library(ggplot2)
library(dplyr)
library(mgcv)
library(DT)
theme_set(theme_bw())
dt<-function(x) DT::datatable(x, 
    filter = "top",                         
    extensions = c('Buttons'), options = list(
    dom = 'Blfrtip',
    buttons = c('copy', 'csv', 'excel'), colReorder = TRUE
  ))

Introduction

This markdown document can be adapted to run a regression analysis on any pair of variables in the Arne data set. You can also use it to run analyses on sub sets of the data.

Loading the data

d<-read.csv("/home/rstudio/webpages/Quantitative_and_Spatial_Analysis/Quantitative_and_Spatial_2018/Arne_project/gis_data/study_points.csv")

Select the variable to analyse

The only factor in your data set is site. You may want to exclude last year’s data

d$year<-as.factor(ifelse(d$site=="last_year",2017,2018))
#d<-subset(d,d$site != "last_year")
#d<-droplevels(d)

d$y<-d$pine_density
d$group<-d$site
xlabel<-"Site"
ylabel <-"Density of pine saplings (n/m²)"

One way ANOVA

The purpose of one way anova is

  1. Test whether there is greater variability between groups than within groups
  2. Quantify any differences found between group means

Grouped boxplots

Exploratory plots

g0<-ggplot(d,aes(x=group,y=y))
g0+geom_boxplot() + xlab("Site") +ylab(ylabel)

Histograms for each factor level

This doesn’t ork very well unless the sample sizes are comporable so it is not run unless you change eval = FALSE to TRUE

g0<-ggplot(d,aes(x=y))
g1<-g0+geom_histogram(color="grey",binwidth = 0.5)

g1+facet_wrap(~group) +xlab(xlabel) 

Confidence interval plot

g0<-ggplot(d,aes(x=group,y=y))
g1<-g0+stat_summary(fun.y=mean,geom="point")
g1<-g1 +stat_summary(fun.data=mean_cl_normal,geom="errorbar")
g1 +xlab(xlabel) + ylab(ylabel)

Fitting ANOVA

mod<-aov(data=d,y~group)
summary(mod)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## group         2   9.46   4.728   5.359 0.00559 **
## Residuals   160 141.14   0.882                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tukey corrected pairwise comparisons

Use to find where signficant differences lie. This should confirm the pattern shown using the confidence interval plot.

mod<-aov(data=d,y~group)
TukeyHSD(mod)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = y ~ group, data = d)
## 
## $group
##                   diff        lwr         upr     p adj
## B-A         -0.8627524 -1.5580314 -0.16747342 0.0106138
## last_year-A -0.5107364 -0.9544040 -0.06706888 0.0195531
## last_year-B  0.3520160 -0.2576498  0.96168177 0.3612384
plot(TukeyHSD(mod))

Anova with White’s correction

This will give you the overall Anova table if there is heterogeneity of variance.

library(sandwich)
library(car)
mod<-lm(y~group, data=d)
Anova(mod,white.adjust='hc3')
## Analysis of Deviance Table (Type II tests)
## 
## Response: y
##            Df      F  Pr(>F)  
## group       2 4.2391 0.01607 *
## Residuals 160                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1