#devtools::install_github("dgolicher/usdata") Install data package by uncommenting this line
library(tidyverse)
library(tmap)
library(mapview)
library(sjPlot)
library(usdata)
data(ukdata)
msoa_wave<-merge(msoa_wave,msoas_area)
msoa_wave %>% mutate(density=pop/(st_areasha/10000)) ->msoa_wave
msoa_wave %>% mutate(pcovid =covid/All) -> msoa_wave
msoa_wave$Region<-as.factor(msoa_wave$RegionNation)
msoa_wave %>% filter(wave=="wave1") ->w
w %>% mutate(response =1000*(All/over60)) -> w
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=log10(price)), legend=FALSE) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
scale.center <- function(x){
(x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(density=scale.center(density)) %>% mutate(pcovid =covid/All)-> ww
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE) -> fig1
fig1
mod<-lm(data=ww, response ~Region+ price +density + pcovid)
plot_model(mod, sort.est = TRUE) -> fig1
fig1
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)
The response variable is the total number of deaths over the time period of the first and second wave respectively divided by the population over 60 expressed as deaths per thousand people over the age of sixty.
The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.
The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.
The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.
msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)
msoa_wave %>% filter(wave=="wave1") ->w
w %>% mutate(response =1000*(covid/over60)) -> w
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col=Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~RegionNation)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
scale.center <- function(x){
(x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(density=scale.center(density)) -> ww
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)
The response variable is the total number of deaths over the time period of the first and second wave respectively divided by the population over 60 expressed as deaths per thousand people over the age of sixty. The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.
The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.
The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.
msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)
msoa_wave$Region<-as.factor(msoa_wave$RegionNation)
msoa_wave %>% filter(wave=="wave2") ->w
w %>% mutate(response =1000*(All/over60)) -> w
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=log10(price)), legend=FALSE) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
scale.center <- function(x){
(x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% mutate(pcovid=scale.center(pcovid)) ->w
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(density=scale.center(density)) %>% mutate(pcovid =covid/All)-> ww
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price +density + pcovid)
plot_model(mod, sort.est = TRUE) -> fig1
fig1
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)
The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.
The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.
The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.
msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)
msoa_wave %>% filter(wave=="wave2") ->w
w %>% mutate(response =1000*(covid/over60)) -> w
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col=Region)) + geom_smooth(se=FALSE)
g0<-ggplot(w,aes(y=response, x=density)) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~RegionNation)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
g0<-ggplot(w,aes(y=response, x=density)) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")
scale.center <- function(x){
(x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(density=scale.center(density)) -> ww
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)
mod<-lm(data=ww, response ~Region+ price + density)
plot_model(mod, sort.est = TRUE)
summary(mod)
##
## Call:
## lm(formula = response ~ Region + price + density, data = ww)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1514 -1.0596 -0.2997 0.6890 22.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34778 0.07450 31.515 < 2e-16 ***
## RegionEast of England -1.17354 0.09935 -11.812 < 2e-16 ***
## RegionLondon -0.86921 0.09375 -9.272 < 2e-16 ***
## RegionNorth East 0.43365 0.12208 3.552 0.000385 ***
## RegionNorth West 0.88102 0.09482 9.291 < 2e-16 ***
## RegionSouth East -1.17573 0.09176 -12.813 < 2e-16 ***
## RegionSouth West -1.33021 0.09947 -13.373 < 2e-16 ***
## RegionWest Midlands -0.08824 0.09938 -0.888 0.374619
## RegionYorkshire and The Humber 0.81292 0.10182 7.984 1.66e-15 ***
## price 3.70479 0.14779 25.067 < 2e-16 ***
## density 1.33800 0.14698 9.103 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.783 on 6779 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2643, Adjusted R-squared: 0.2632
## F-statistic: 243.6 on 10 and 6779 DF, p-value: < 2.2e-16
anova(mod)
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 8 4840.3 605.03 190.256 < 2.2e-16 ***
## price 1 2641.6 2641.58 830.660 < 2.2e-16 ***
## density 1 263.5 263.54 82.873 < 2.2e-16 ***
## Residuals 6779 21557.9 3.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 2.2017376 2.4938154
## RegionEast of England -1.3682953 -0.9787756
## RegionLondon -1.0529841 -0.6854412
## RegionNorth East 0.1943394 0.6729634
## RegionNorth West 0.6951400 1.0669092
## RegionSouth East -1.3556096 -0.9958499
## RegionSouth West -1.5251980 -1.1352127
## RegionWest Midlands -0.2830594 0.1065763
## RegionYorkshire and The Humber 0.6133129 1.0125290
## price 3.4150654 3.9945097
## density 1.0498798 1.6261246
plot(mod)
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)
The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.
The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.
The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.
msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)
msoa_wave %>% mutate(response =1000*(All/over60)) %>% filter(response<100)-> ms
ms %>% pivot_wider(id_cols=c("Region","msoa11cd", "price"),names_from = wave,values_from = response) -> w
g0<-ggplot(w,aes(y=wave1, x=wave2)) + ylab("All cause second wave deaths \n Per thousand people over 60") + xlab("All cause first wave deaths \n Per thousand people over 60")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm") +facet_wrap(~Region)
msoa_wave %>% mutate(response =1000*(covid/over60)) %>% filter(response<12)-> ms
ms %>% pivot_wider(id_cols=c("Region","msoa11cd", "price"),names_from = wave,values_from = response) -> w
g0<-ggplot(w,aes(x=wave1, y=wave2)) + ylab("Covid second wave deaths \n Per thousand people over 60") + xlab("Covid first wave deaths \n Per thousand people over 60")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm") +facet_wrap(~Region)
House prices and wave 1 deaths both centred and scaled to region.
scale.center <- function(x){
(x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(wave1=scale.center(wave1)) -> ww
mod<-lm(data=ww, wave2 ~ wave1+Region+ price)
plot_model(mod)
# library(readxl)
# d <- read_excel("hlereferencetable2_tcm77-417533.xls",
# sheet = "UTLA_males", skip = 7, n_max = 152)
# names(d)<-gsub(" ","_",names(d))
# d<-d[,c(2,3,7,10)]
# names(d)<-c("LA","Healthy_life_expectancy","Life_expectancy","Proportion_of_life_healthy")
# healthy<-d
data(healthy)
d<-healthy
DT::datatable(d,
filter = "top",
extensions = c('Buttons'), options = list(
dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'), colReorder = TRUE
))
Note that some codes do not coincide, so some Local authorities are missing. The general pattern is shown.
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")
names(lashex)[2]<-"LA"
dd<-merge(lashex,d)
tmap::qtm(backhex) + tm_shape(dd) +
tm_fill("Life_expectancy", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)
hist(d$Life_expectancy)
hist(d$Healthy_life_expectancy)
The data shown are from the ONS.