Fitting polynomials

The data

d<-read.csv("/home/aqm/course/data/marineinverts.csv")
DT::datatable(d)

Fitting linear model

Linear model fit

mod<-lm(data=d,richness~grain)

Linear model anova and summary

anova(mod)
## Analysis of Variance Table
## 
## Response: richness
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## grain      1 385.13  385.13  23.113 1.896e-05 ***
## Residuals 43 716.52   16.66                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod)
## 
## Call:
## lm(formula = richness ~ grain, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8386 -2.0383 -0.3526  2.5768 11.6620 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.669264   2.767726   6.745 3.01e-08 ***
## grain       -0.046285   0.009628  -4.808 1.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.082 on 43 degrees of freedom
## Multiple R-squared:  0.3496, Adjusted R-squared:  0.3345 
## F-statistic: 23.11 on 1 and 43 DF,  p-value: 1.896e-05

Linear model plot

library(ggplot2)
theme_set(theme_bw())
g0<-ggplot(d,aes(x=grain,y=richness))
g1<-g0+geom_point() + geom_smooth(method="lm") 
g1 + xlab("Mean grain size") + ylab("Species richness")

Reset test

We can check whether a strait line is a good representation of the pattern using the reset test that will have a low p-value if the linear form of the model is not a good fit.

library(lmtest)
resettest(d$richness ~ d$grain)
## 
##  RESET test
## 
## data:  d$richness ~ d$grain
## RESET = 19.074, df1 = 2, df2 = 41, p-value = 1.393e-06

Durbin Watson test

The Durbin Watson test which helps to confirm serial autocorrelation that may be the result of a misformed model will often also be significant when residuals cluster on one side of the line. In this case it was not, but this may be because there were too few data points.

dwtest(d$richness~d$grain)
## 
##  Durbin-Watson test
## 
## data:  d$richness ~ d$grain
## DW = 1.7809, p-value = 0.1902
## alternative hypothesis: true autocorrelation is greater than 0

Diagnostic plot after fitting linear model

library(ggfortify)
theme_set(theme_bw())
autoplot(mod, which = 1)

Fitting quadratic model

Quadratic model fit

mod2<-lm(data=d,richness~grain + I(grain^2))

Quadratic model anova and summary

anova(mod2)
## Analysis of Variance Table
## 
## Response: richness
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## grain       1 385.13  385.13  29.811 2.365e-06 ***
## I(grain^2)  1 173.93  173.93  13.463   0.00068 ***
## Residuals  42 542.59   12.92                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod2)
## 
## Call:
## lm(formula = richness ~ grain + I(grain^2), data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5779 -2.5315  0.2172  2.1013  8.3415 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 53.0133538  9.6721492   5.481 2.21e-06 ***
## grain       -0.2921821  0.0675505  -4.325 9.19e-05 ***
## I(grain^2)   0.0004189  0.0001142   3.669  0.00068 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.594 on 42 degrees of freedom
## Multiple R-squared:  0.5075, Adjusted R-squared:  0.484 
## F-statistic: 21.64 on 2 and 42 DF,  p-value: 3.476e-07

Quadratic model plot

library(ggplot2)
theme_set(theme_bw())
g0<-ggplot(d,aes(x=grain,y=richness))
g1<-g0+geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2), se=TRUE) 
g1 + xlab("Mean grain size") + ylab("Species richness")

Diagnostic plot after fitting quadratic

library(ggfortify)
theme_set(theme_bw())
autoplot(mod2, which = 1)

Comparing fits

anova(mod,mod2)
## Analysis of Variance Table
## 
## Model 1: richness ~ grain
## Model 2: richness ~ grain + I(grain^2)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)    
## 1     43 716.52                                
## 2     42 542.59  1    173.93 13.463 0.00068 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fitting splines using mgcv

Fiting model

library(mgcv)
mod3<-gam(data=d,richness~s(grain))

Summary model

summary(mod3)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## richness ~ s(grain)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.6889     0.4601   12.36  2.6e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df     F  p-value    
## s(grain) 3.615  4.468 15.92 3.25e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.619   Deviance explained = 65.1%
## GCV = 10.616  Scale est. = 9.5269    n = 45

Plotting model

theme_set(theme_bw())
g0<-ggplot(d,aes(x=grain,y=richness))
g1<-g0+geom_point() + stat_smooth(method = "gam", formula = y ~ s(x)) 
g1 + xlab("Mean grain size") + ylab("Species richness")

Non linear model

Rectangular hyperbola of the Michaelis-Menten form

\[C=\frac{sR}{F+R}\]

Where

  • C is resource consumption,
  • R is the amount or density of the resource,
  • s is the asymptotic value and
  • F represents the density of resource at which half the asymptotic consumption is expected to occur. This model is not linear in its parameters.

Fitting model

Starting values need to be provided. Plot the data first to estimate the asymptote

d<-read.csv("/home/aqm/course/data/Hollings.csv")
g0<-ggplot(data=d,aes(x=Resource,y=Consumption)) + geom_point()
g0

Fitting the model

nlmod<-nls(Consumption~s*Resource/(F+Resource),data = d,start = list( F = 20,s=20)) 

Curve fit

g0<-ggplot(data=d,aes(x=Resource,y=Consumption))
g1<-g0+geom_point()
g2<-g1+geom_smooth(method="nls",formula=y~s*x/(F+x),method.args=list(start = c( F = 20,s=20)), se=FALSE)
g2

Curve fit with confidence intervals

This needs the propagate package.

require(propagate)
newdata<-data.frame(Resource=seq(min(d$Resource),max(d$Resource),length=100))
pred_model <- predictNLS(nlmod, newdata=newdata,nsim = 10000)
conf_model <- pred_model$summary

newdata<-data.frame(newdata,conf_model)

g2  + geom_line(data=newdata,aes(x=Resource,y=Prop.2.5.),col="black",lty=2) + geom_line(data=newdata,aes(x=Resource,y=Prop.97.5.),col="black",lty=2)

Holling’s disc equation

The classic version of Hollings disk equation used in the article is written as

\(R=\frac{aD}{1+aDH}\)

Where

  • F = feeding rate (food items)
  • D = food density (food items \(m^{-2}\))
  • a = searching rate (\(m^{2}s^{-1}\))
  • H = handling time (s per food item).

The data

d<-read.csv("/home/aqm/course/data/buntings.csv")
g0<-ggplot(data=d,aes(x=density,y=rate)) + geom_point()
g0

Fitting the model

d<-read.csv("/home/aqm/course/data/buntings.csv")
HDmod<-nls(rate~a*density/(1+a*density*H),data = d,start = list(a =0.001,H=2)) 

Confidence intervals for parameters

confint(HDmod)
##          2.5%       97.5%
## a 0.002593086 0.006694939
## H 1.713495694 1.976978655

Plot with fitted curve

g0<-ggplot(data=d,aes(x=density,y=rate))
g1<-g0+geom_point()
g2<-g1+geom_smooth(method="nls",formula=y~a*x/(1+a*x*H),method.args=list(start = c(a = 0.01,H=2)), se=FALSE)
g2

Plot with confidence intervals

require(propagate)
newdata<-data.frame(density=seq(0,max(d$density),length=100))
pred_model <- predictNLS(HDmod, newdata=newdata,nsim = 10000)
conf_model <- pred_model$summary

newdata<-data.frame(newdata,conf_model)

g3<-g2  + geom_line(data=newdata,aes(x=density,y=Prop.2.5.),col="black",lty=2) + geom_line(data=newdata,aes(x=density,y=Prop.97.5.),col="black",lty=2)
g3

Non linear quantile regression

library(quantreg)
QuantMod90<-nlrq(rate~a*density/(1+a*density*H),data = d,start = list(a =0.001,H=2),tau=0.9)
QuantMod10<-nlrq(rate~a*density/(1+a*density*H),data = d,start = list(a =0.001,H=2),tau=0.1)
newdata$Q90<- predict(QuantMod90, newdata = newdata)
newdata$Q10 <- predict(QuantMod10, newdata = newdata)

g3 + geom_line(data=newdata,aes(x=density,y=Q90),col="red") + geom_line(data=newdata,aes(x=density,y=Q10),col="red")