#devtools::install_github("dgolicher/usdata") Install data package by uncommenting this line
library(tidyverse)
library(tmap)
library(mapview)
library(sjPlot)
library(usdata)
data(ukdata)

First wave (up to 2020-8-15) deaths

All cause deaths per thousand people over 60

Total over all msoas

House prices

msoa_wave<-merge(msoa_wave,msoas_area)
msoa_wave %>% mutate(density=pop/(st_areasha/10000)) ->msoa_wave
msoa_wave  %>% mutate(pcovid =covid/All) -> msoa_wave 
msoa_wave$Region<-as.factor(msoa_wave$RegionNation)
msoa_wave %>% filter(wave=="wave1") ->w
w %>% mutate(response =1000*(All/over60)) -> w

g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

By region

House prices

g0<-ggplot(w,aes(y=response, x=log10(price)), legend=FALSE) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Statistical models

scale.center <- function(x){
  (x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>%  mutate(density=scale.center(density)) %>% mutate(pcovid =covid/All)-> ww

Region

Plot
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)

Region + price

Plot
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)

Region + price + density

Plot
mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE) -> fig1
fig1

Region + price + density + proprtion of covid

Plot
mod<-lm(data=ww, response ~Region+ price +density + pcovid)
plot_model(mod, sort.est = TRUE) -> fig1
fig1

Region and price interactions

Plot
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)

Explanation of model fitting

The response variable is the total number of deaths over the time period of the first and second wave respectively divided by the population over 60 expressed as deaths per thousand people over the age of sixty.

The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.

The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.

The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.

Mapped

msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
  tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)

Covid deaths deaths per thousand of the population over 60

Total over all msoas

House prices

msoa_wave %>% filter(wave=="wave1") ->w
w %>% mutate(response =1000*(covid/over60)) -> w

g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col=Region)) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

By region

House prices

g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the first wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~RegionNation)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the first wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Statistical models

scale.center <- function(x){
  (x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>%  mutate(density=scale.center(density)) -> ww

Region

Plot
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)

Region + price

Plot
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)

Region + price + density

Plot
mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE)

Region and price interactions

Plot
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)

Explanation of model fitting

The response variable is the total number of deaths over the time period of the first and second wave respectively divided by the population over 60 expressed as deaths per thousand people over the age of sixty. The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.

The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.

The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.

Mapped

msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
  tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)

Second wave (after 2020-8-15) deaths

All cause deaths per thousand people over 60

Total over all msoas

House prices

msoa_wave$Region<-as.factor(msoa_wave$RegionNation)
msoa_wave %>% filter(wave=="wave2") ->w
w %>% mutate(response =1000*(All/over60)) -> w
g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

By region

House prices

g0<-ggplot(w,aes(y=response, x=log10(price)), legend=FALSE) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("All cause deaths in the second wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Statistical models

scale.center <- function(x){
  (x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% mutate(pcovid=scale.center(pcovid)) ->w
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>%  mutate(density=scale.center(density)) %>% mutate(pcovid =covid/All)-> ww

Region

Plot
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)

Region + price

Plot
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)

Region + price + density

mod<-lm(data=ww, response ~Region+ price +density)
plot_model(mod, sort.est = TRUE)

Region + price + density + proportion covid

mod<-lm(data=ww, response ~Region+ price +density + pcovid)
plot_model(mod, sort.est = TRUE) -> fig1
fig1

Region and price interactions

Plot
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)

Explanation of model fitting

The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.

The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.

The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.

Mapped

msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
  tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)

Covid deaths deaths per thousand of the population over 60

Total over all msoas

House prices

msoa_wave %>% filter(wave=="wave2") ->w
w %>% mutate(response =1000*(covid/over60)) -> w

g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices")
g0 + geom_point(size=0.1, aes(col=Region)) + geom_smooth(se=FALSE)

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Population density")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm")

By region

House prices

g0<-ggplot(w,aes(y=response, x=log10(price))) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Log10 house prices") +facet_wrap(~RegionNation)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Population density

g0<-ggplot(w,aes(y=response, x=density)) + ylab("Covid deaths in the second wave \n Per thousand people over 60") + xlab("Population density") +facet_wrap(~Region)
g0 + geom_point(size=0.1) + geom_smooth(method="lm")

Statistical models

scale.center <- function(x){
  (x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>%  mutate(density=scale.center(density)) -> ww

Region

Plot
mod<-lm(data=ww, response~Region)
plot_model(mod, sort.est = TRUE)

Region + price

Plot
mod<-lm(data=ww, response ~Region+ price)
plot_model(mod, sort.est = TRUE)

Region + price + density

Plot
mod<-lm(data=ww, response ~Region+ price + density)
plot_model(mod, sort.est = TRUE)

Diagnostics
summary(mod)
## 
## Call:
## lm(formula = response ~ Region + price + density, data = ww)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1514 -1.0596 -0.2997  0.6890 22.3213 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     2.34778    0.07450  31.515  < 2e-16 ***
## RegionEast of England          -1.17354    0.09935 -11.812  < 2e-16 ***
## RegionLondon                   -0.86921    0.09375  -9.272  < 2e-16 ***
## RegionNorth East                0.43365    0.12208   3.552 0.000385 ***
## RegionNorth West                0.88102    0.09482   9.291  < 2e-16 ***
## RegionSouth East               -1.17573    0.09176 -12.813  < 2e-16 ***
## RegionSouth West               -1.33021    0.09947 -13.373  < 2e-16 ***
## RegionWest Midlands            -0.08824    0.09938  -0.888 0.374619    
## RegionYorkshire and The Humber  0.81292    0.10182   7.984 1.66e-15 ***
## price                           3.70479    0.14779  25.067  < 2e-16 ***
## density                         1.33800    0.14698   9.103  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.783 on 6779 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2643, Adjusted R-squared:  0.2632 
## F-statistic: 243.6 on 10 and 6779 DF,  p-value: < 2.2e-16
anova(mod)
## Analysis of Variance Table
## 
## Response: response
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## Region       8  4840.3  605.03 190.256 < 2.2e-16 ***
## price        1  2641.6 2641.58 830.660 < 2.2e-16 ***
## density      1   263.5  263.54  82.873 < 2.2e-16 ***
## Residuals 6779 21557.9    3.18                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod)
##                                     2.5 %     97.5 %
## (Intercept)                     2.2017376  2.4938154
## RegionEast of England          -1.3682953 -0.9787756
## RegionLondon                   -1.0529841 -0.6854412
## RegionNorth East                0.1943394  0.6729634
## RegionNorth West                0.6951400  1.0669092
## RegionSouth East               -1.3556096 -0.9958499
## RegionSouth West               -1.5251980 -1.1352127
## RegionWest Midlands            -0.2830594  0.1065763
## RegionYorkshire and The Humber  0.6133129  1.0125290
## price                           3.4150654  3.9945097
## density                         1.0498798  1.6261246
plot(mod)

Region and price interactions

Plot
mod<-lm(data=ww, response ~Region* price)
plot_model(mod)

Explanation of model fitting

The first model shows differences between regions, ignoring any trend associated with the within region house price for the MSOA. This is equivalent to plotting out the mean deaths per region over the msoas with confidence intervals.

The second model uses logged house prices. However these are centred and scaled to lie between 0 and 1 for each of the regions. An additive model is then built using this scaled relative house prices as input. The interpretation of the coefficient for price is that the effect (slope of the regression line) is uniform across the country after scaling the variability within each region. An unscaled model would be unsuitable as it would pick up on differences in house prices between regions. This approach avoids that artefact and separates regional differences from differences in their mean house prices.

The third model includes an interaction term. This allows any differences for within region trends associated with house prices to be detected. So, if the slope of the line is steeper of flatter in some regions this will show up as an interaction. This is the most complete moel and will tend to have the best statistical properties when diagnostics are investigated. However it the hardest to interpret directly.

Mapped

msoahex %>% right_join(w, by="msoa11cd") ->w_geo
library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")[7:1]
qtm(backhex) +
tm_shape(w_geo) +
  tm_fill("response", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)

First wave against second wave

All cause

msoa_wave %>% mutate(response =1000*(All/over60)) %>% filter(response<100)-> ms
ms %>% pivot_wider(id_cols=c("Region","msoa11cd", "price"),names_from = wave,values_from = response) -> w
g0<-ggplot(w,aes(y=wave1, x=wave2)) + ylab("All cause second wave deaths \n Per thousand people over 60") + xlab("All cause first wave deaths \n Per thousand people over 60")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm") +facet_wrap(~Region)

Covid

msoa_wave %>% mutate(response =1000*(covid/over60)) %>% filter(response<12)-> ms
ms %>% pivot_wider(id_cols=c("Region","msoa11cd", "price"),names_from = wave,values_from = response) -> w
g0<-ggplot(w,aes(x=wave1, y=wave2)) + ylab("Covid second wave deaths \n Per thousand people over 60") + xlab("Covid first wave deaths \n Per thousand people over 60")
g0 + geom_point(size=0.1, aes(col= Region)) + geom_smooth(method="lm") +facet_wrap(~Region)

Model for covid deaths in wave 2

House prices and wave 1 deaths both centred and scaled to region.

scale.center <- function(x){
  (x-mean(x,na.rm=TRUE)) /(max(x,na.rm = TRUE)-min(x,na.rm = TRUE))
}
w %>% group_by(Region) %>% mutate(price=-scale.center(log(price))) %>% mutate(wave1=scale.center(wave1)) -> ww

mod<-lm(data=ww, wave2 ~ wave1+Region+ price)
plot_model(mod)

Underlying differences in life expectancy

https://cy.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/healthandlifeexpectancies/datasets/lifeexpectancyleandhealthylifeexpectancyhleatbirthbysexformiddlelayersuperoutputareasmsoasinengland

# library(readxl)
# d <- read_excel("hlereferencetable2_tcm77-417533.xls", 
#                                               sheet = "UTLA_males", skip = 7,  n_max = 152)
# names(d)<-gsub(" ","_",names(d))
# d<-d[,c(2,3,7,10)]
# names(d)<-c("LA","Healthy_life_expectancy","Life_expectancy","Proportion_of_life_healthy")
# healthy<-d
data(healthy)
d<-healthy

Searchable data table

DT::datatable(d, 
                              filter = "top",                         
                              extensions = c('Buttons'), options = list(
                                dom = 'Blfrtip',
                                buttons = c('copy', 'csv', 'excel'), colReorder = TRUE
                              ))

Map

Note that some codes do not coincide, so some Local authorities are missing. The general pattern is shown.

library(RColorBrewer)
colors <- brewer.pal(7, "RdYlGn")
names(lashex)[2]<-"LA"
dd<-merge(lashex,d)
tmap::qtm(backhex) + tm_shape(dd) +
  tm_fill("Life_expectancy", palette = colors, midpoint = NA, style="quantile") + tm_borders(lwd = 0) + tm_legend(legend.outside =TRUE)

Histogram of life expectancy

hist(d$Life_expectancy)

Histogram of healthy life expectancy

hist(d$Healthy_life_expectancy)

Data source for Covid and all cause mortality

The data shown are from the ONS.

https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/deathsduetocovid19bylocalareaanddeprivation/december2020/covidlocalareadeprivationupdate.xlsx