Working with the reformatted data

library(readxl)
library(dplyr)
library(tidyr)
library(aqm)
library(ggplot2)

## Pull out a vector with the core identifiers

core<- as.vector(t(read_excel("LGI September 2018 Abundance and Biometric Data - reformated.xlsx", 
    sheet = "MF Measurements Sept 2018", 
    col_names = FALSE, skip = 2, n_max = 1)[,-1]))


## Pull out a vector with the type of core identifiers 

type<-as.vector(t(read_excel("LGI September 2018 Abundance and Biometric Data - reformated.xlsx", 
    sheet = "MF Measurements Sept 2018", 
    col_names = FALSE, skip = 3, n_max = 1)[,-1]))


## Read in the data matrix

MF <- read_excel("LGI September 2018 Abundance and Biometric Data - reformated.xlsx", 
    sheet = "MF Measurements Sept 2018", 
    skip = 4)

Sorting out the names

names(MF)<-tolower(names(MF)) ## Change all to lower case
names(MF)<-gsub("[^\\p{L}']+", " ", names(MF), perl = TRUE) ## Strip anything that's not a letter or a space.
names(MF)<- gsub(" $","", names(MF), perl=T) ## Strip trailing spaces
names(MF)<- gsub(" ","_", names(MF), perl=T) ## Underscore for space

## Now paste the core ids onto all the names except the first one

names(MF)[-1]<-paste(names(MF)[-1],type,core,sep="-")

Pivot and separate

This should now produce a proper data frame.

MF %>% pivot_longer(cols=c(-1), values_drop_na=TRUE) %>% separate(name, "-", into = c("Taxon","Type","Core")) %>% separate(size_class, "-",into=c("Lower","Higher")) %>% mutate (Lower=aqm::clean(Lower), Higher=aqm::clean(Higher), Mid_class=round((Lower+Higher)/2, 1)) -> MF

dt(MF)
taxa <- read_excel("London Gateway Invertebrate taxa.xlsx")
names(taxa)[2]<-"Taxon_group"
taxa$Taxon<-tolower(taxa$Taxon)
taxa$Taxon<-gsub("\\(", "", taxa$Taxon)
taxa$Taxon<-gsub("\\)", "", taxa$Taxon)
taxa$Taxon<-gsub(" ", "_", taxa$Taxon)

MF$Taxon<-gsub("cappitellidae","capitellidae",MF$Taxon )
unique(MF$Taxon) %in% taxa$Taxon
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
MF %>% left_join(taxa, by = "Taxon") -> MF
dt(MF)
unique(MF$Taxon_group)
## [1] "Mollusc"   "Worm"      "Crustacea"
MF %>% group_by(Taxon_group,Mid_class) %>% summarise(n=n(), sum=sum(value)) %>% arrange(-sum) -> sm
dt(sm)

Test a conversion function

conversion<-function(tg,x){
  if(tg=="Mollusc") afdm<-exp(-3.8521+2.5525*log(x)+0.5*0.14)
  if(tg=="Worm") afdm<-exp(-4.8+1.88*log(x)+0.5*0.175)
  if(tg=="Crustacea") afdm<-exp(-5.2531+2.6753*log(x)+0.5*0.0787)
  afdm
}
  

test<-data.frame(tg=rep(unique(MF$Taxon_group),each=60),x=rep(1:60,times=3))

test$afdm<-unlist(mapply(conversion, test$tg, test$x))

ggplot(test,aes(x=x,y=afdm,colour=tg)) + geom_line()