d<- read_csv("Skylark Flight and Song Data.csv")
# As the IDs are compound have to separate the first letters to find the site
d %>% separate(Flight_ID, into=c("site")) ->d
# Names too long
names(d)<-c("site","Take_off","Landing","Flight_length","Song_length","x","y","Distance")
# One site seems to be mislabled as HH3
d$site<-as.factor(gsub("HH3","HH2",d$site))
write.csv(d,"skylarks.csv", row.names = FALSE)

Song length

d %>% ggplot(aes(x=site,y=Song_length)) -> g0
g0+geom_boxplot()

aqm::ci(g0)

mod<-lm(data=d, Song_length~site)
anova(mod)
## Analysis of Variance Table
## 
## Response: Song_length
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## site        2 1028662  514331  21.786 9.712e-09 ***
## Residuals 114 2691357   23608                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod)
## 
## Call:
## lm(formula = Song_length ~ site, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373.26  -78.26    0.68   65.68  829.74 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   111.32      30.73   3.623 0.000437 ***
## siteHH2        92.94      36.49   2.547 0.012191 *  
## siteMD        261.94      41.30   6.342 4.72e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 153.7 on 114 degrees of freedom
## Multiple R-squared:  0.2765, Adjusted R-squared:  0.2638 
## F-statistic: 21.79 on 2 and 114 DF,  p-value: 9.712e-09
#performance::check_model(mod)

Pairwise comparisons

To see where the differences lie we can use pairwise comparisons.

summary(glht(mod, linfct = mcp(site = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = Song_length ~ site, data = d)
## 
## Linear Hypotheses:
##                Estimate Std. Error t value Pr(>|t|)    
## HH2 - HH1 == 0    92.94      36.49   2.547   0.0321 *  
## MD - HH1 == 0    261.94      41.30   6.342   <1e-04 ***
## MD - HH2 == 0    169.00      33.89   4.986   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Flight length

d %>% ggplot(aes(x=site,y=Flight_length)) -> g0
g0+geom_boxplot()

aqm::ci(g0)

mod<-lm(data=d, Flight_length~site)
anova(mod)
## Analysis of Variance Table
## 
## Response: Flight_length
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## site        2 1089321  544661  20.024 3.526e-08 ***
## Residuals 114 3100886   27201                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod)
## 
## Call:
## lm(formula = Flight_length ~ site, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -369.19  -86.26   -4.26   58.74 1121.81 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   149.72      32.99   4.539 1.41e-05 ***
## siteHH2        92.54      39.17   2.363   0.0198 *  
## siteMD        268.47      44.33   6.056 1.84e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 164.9 on 114 degrees of freedom
## Multiple R-squared:   0.26,  Adjusted R-squared:  0.247 
## F-statistic: 20.02 on 2 and 114 DF,  p-value: 3.526e-08
#performance::check_model(mod)
summary(glht(mod, linfct = mcp(site = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = Flight_length ~ site, data = d)
## 
## Linear Hypotheses:
##                Estimate Std. Error t value Pr(>|t|)    
## HH2 - HH1 == 0    92.54      39.17   2.363   0.0509 .  
## MD - HH1 == 0    268.47      44.33   6.056   <1e-04 ***
## MD - HH2 == 0    175.93      36.38   4.836   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Distance

d %>% filter(Distance<100) %>% ggplot(aes(x=Flight_length, y=Distance)) -> g0
g0 +geom_point() + geom_smooth(method="lm") + facet_wrap(~site)

Number of flights

This is tricky to analyse as the number of observations for each site is very small. I will try my best to get a result. First aggregate the data and count the number of flights for each combination of day and site.

library(lubridate)
d$day<-yday(as.Date(d$Take_off))

d %>% group_by(site,day) %>% summarise(n=n()) -> dd

Plotting this out looks like this (don’t use the plot in the results, I’m just trying to see the data here)

dd %>% ggplot(aes(x=day,y=n,colour=site)) -> g0
g0 +geom_point()

One way to test for signficance with count data is to use a generalised linear model of the poisson family.

mod<-glm(data=dd,n~site,family=poisson(link="identity"))
anova(mod,test="Chisq")
## Analysis of Deviance Table
## 
## Model: poisson, link: identity
## 
## Response: n
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                    11     55.885           
## site  2    8.717         9     47.168   0.0128 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This does show overall signficance, but again the number of observations is small, so this is marginal.

summary(mod)
## 
## Call:
## glm(formula = n ~ site, family = poisson(link = "identity"), 
##     data = dd)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -3.15960  -2.03800  -0.07249   1.23961   2.51847  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    6.250      1.250   5.000 5.73e-07 ***
## siteHH2        5.950      2.001   2.974  0.00294 ** 
## siteMD         4.083      2.238   1.825  0.06802 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 55.885  on 11  degrees of freedom
## Residual deviance: 47.168  on  9  degrees of freedom
## AIC: 99.332
## 
## Number of Fisher Scoring iterations: 3

The difference that is significant is between HH1 and HH2

summary(glht(mod, linfct = mcp(site = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = n ~ site, family = poisson(link = "identity"), 
##     data = dd)
## 
## Linear Hypotheses:
##                Estimate Std. Error z value Pr(>|z|)   
## HH2 - HH1 == 0    5.950      2.001   2.974  0.00828 **
## MD - HH1 == 0     4.083      2.238   1.825  0.16017   
## MD - HH2 == 0    -1.867      2.426  -0.770  0.72041   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)