{r}
############### LIBRARIES NEEDED ###############
library(vegan)
library(psych)
library(dplyr)
library(RColorBrewer)
library(devtools)
library(gridBase)
library(grid)
library(rgl)
library(readxl)
install_github("CropPro-Package/CropPro")
library(CropPro)

############### IMPORT AND PREPARE DATA TABLES ###############
#duplicate Supplementary Tables and save with site/data names
SHAR_data <- read_excel("Dataset_S1.xlsx") 
HEM_data <- read_excel("Dataset_S2.xlsx")
HEM_PPNA <-HEM_data[1:44] #separate out PPNA data from el-Hemmeh data table
HEM_LPPNB <-HEM_data[45:55] 
HEM_LPPNB<-cbind.data.frame(HEM_data[1:9], HEM_LPPNB) #separate out LPPNB data from el-Hemmeh data table and add preceding species/code columns back in
SamplesPeriod <- read_excel("Dataset_S3.xlsx") 
BarGrn_measurements <- read_excel("Dataset_S4.xlsx") 

############ BOTANICAL COMPOSITION OF SAMPLES ############
### SHARARA ###
### group 1 categories ###
SHAR_group1<-aggregate(SHAR_data[10:43], by=list(SHAR_data$"group 1"),  FUN = sum) #group by 'group 1' categories
SHAR_group1_10<-SHAR_group1 [2:35] #select any columns containing sample data
colSums(SHAR_group1_10)
SHAR_group1_10<-SHAR_group1_10[, colSums(SHAR_group1_10)>9.99] #remove samples with <10 items
SHAR_group1_10<-cbind.data.frame(SHAR_group1[1], SHAR_group1_10)
SHAR_group1_10_data_percentage <- apply(SHAR_group1_10[,2:29], 2, function(x){x*100/sum(x)})
row.names(SHAR_group1_10_data_percentage)<-SHAR_group1_10$Group.1 #calculate the % of each category within samples
### re-order rows/columns ###
SHAR_group1_10_data_percentage_ordered<-SHAR_group1_10_data_percentage[c(1,3,2,5,4,6),] #re-order rows (i.e. group 1 categories)
SHAR_group1_10_data_percentage_ordered<-SHAR_group1_10_data_percentage_ordered [, c(18,26,24,7,21,4,11,14,5,15,16,17,12,23,27,13,6,28,19,22,10,9,25,1,2,3,8)] #re-order columns (i.e. samples)
### create bar graph ###
pdf('SHAR_BarGroup1.pdf') #export plot into PDF directly
col<-c("#0072B2","#009E73","#999999","#F0E442","#D55E00","white") #assign colours
par(mar=c(4,4,4,9))
barplot(SHAR_group1_10_data_percentage_ordered, col=col, xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(SHAR_group1_10_data_percentage_ordered) ,
        args.legend = list(x = 42, y=50, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf

### group 2 categories ###
SHAR_group2<-aggregate(SHAR_data[10:43], by=list(SHAR_data$"group 2"),  FUN = sum) #group by 'group 2' categories
SHAR_group2_10<-SHAR_group2 [2:35] #select any columns containing sample data
colSums(SHAR_group2_10)
SHAR_group2_10<-SHAR_group2_10[, colSums(SHAR_group2_10)>9.99] #remove samples with <10 items
SHAR_group2_10<-cbind.data.frame(SHAR_group2[1], SHAR_group2_10)
SHAR_group2_10_data_percentage <- apply(SHAR_group2_10[,2:29], 2, function(x){x*100/sum(x)})
row.names(SHAR_group2_10_data_percentage)<-SHAR_group2_10$Group.1 #calculate the % of each category within samples
### re-order rows/columns ###
SHAR_group2_10_data_percentage_ordered<-SHAR_group2_10_data_percentage[c(1,2,4,3,7,6,5,8),] #re-order rows (i.e. group 2 categories)
SHAR_group2_10_data_percentage_ordered<-SHAR_group2_10_data_percentage_ordered [, c(18,26,24,7,21,4,11,14,5,15,16,17,12,23,27,13,6,28,19,22,10,9,25,1,2,3,8)] #re-order columns (i.e. samples)
### create bar chart ###
pdf('SHAR_BarGroup2.pdf') #export plot into PDF directly
col<-c("#0072B2","#56B4E9","green","#999999","#F0E442","#D55E00","#CC79A7","white") #assign colours
par(mar=c(4,4,4,9))
barplot(SHAR_group2_10_data_percentage_ordered, col=col, xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(SHAR_group2_10_data_percentage_ordered) ,
        args.legend = list(x =47, y=50, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf

### PPNA HEMMEH ###
### group 1 categories ###
PPNA_group1<-aggregate(HEM_PPNA[10:44], by=list(HEM_data$"group 1"),  FUN = sum) #group by 'group 1' categories
PPNA_group1_data_percentage <- apply(PPNA_group1[,2:36], 2, function(x){x*100/sum(x)})
row.names(PPNA_group1_data_percentage)<-PPNA_group1$Group.1 #calculate the % of each catergory within samples
### re-order rows/columns ###
PPNA_group1_data_percentage_ordered<-PPNA_group1_data_percentage[c(1,3,2,5,4,6),] #re-order rows (i.e. group 1 categories)
PPNA_group1_data_percentage_ordered<-PPNA_group1_data_percentage_ordered [, c(24,27,21,26,23,20,5,16,2,19,22,1,14,9,31,17,28,8,11,10,4,29,34,18,30,15,12,35,33,3,7,32,6,25,13)] #re-order rows (i.e. samples)
### create bar graph ###
pdf('HEM_PPNA_BarGroup1.pdf') #export plot into PDF directly
col<-c("#0072B2","#009E73","#999999","#F0E442","#D55E00","white") #assign colours
par(mar=c(4,4,4,9))
barplot(PPNA_group1_data_percentage_ordered, col=col, xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(PPNA_group1_data_percentage_ordered) ,
        args.legend = list(x = 60, y=70, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf

### group 2 categories ###
PPNA_group2<-aggregate(HEM_PPNA[10:44], by=list(HEM_data$"group 2"),  FUN = sum) #group by 'group 2' categories
PPNA_group2_data_percentage <- apply(PPNA_group2[,2:36], 2, function(x){x*100/sum(x)})
row.names(PPNA_group2_data_percentage)<-PPNA_group2$Group.1 #calculate the % of each catergory within samples
### re-order rows/columns ###
PPNA_group2_data_percentage_ordered<-PPNA_group2_data_percentage[c(1,2,4,5,3,8,7,6,9),] #re-order rows (i.e. group 2 categories)
PPNA_group2_data_percentage_ordered<-PPNA_group2_data_percentage_ordered [, c(24,27,21,26,23,20,5,16,2,19,22,1,14,9,31,17,28,8,11,10,4,29,34,18,30,15,12,35,33,3,7,32,6,25,13)] #re-order rows (i.e. samples)
### create bar chart ###
pdf('HEM_PPNA_BarGroup2.pdf') #export plot into PDF directly
col<-c("#0072B2","#56B4E9","green","#009E73","#999999","#F0E442","#D55E00","#CC79A7","white") #assign colours
par(mar=c(4,4,4,9))
barplot(PPNA_group2_data_percentage_ordered, col=col, xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(PPNA_group2_data_percentage_ordered) ,
        args.legend = list(x =61, y=70, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf

### LPPNB HEMMEH ###
### group 1 categories ###
LPPNB_group1<-aggregate(HEM_LPPNB[10:20], by=list(HEM_data$"group 1"),  FUN = sum) #group by 'group 1' categories
LPPNB_group1_data_percentage <- apply(LPPNB_group1[,2:12], 2, function(x){x*100/sum(x)})
row.names(LPPNB_group1_data_percentage)<-LPPNB_group1$Group.1 #calculate the % of each category within samples
### re-order rows/columns ###
LPPNB_group1_data_percentage_ordered<-LPPNB_group1_data_percentage[c(3,1,2,5,4,6),] #re-order rows (i.e. group 1 categories)
LPPNB_group1_data_percentage_ordered<-LPPNB_group1_data_percentage_ordered [, c(9,6:7,5,8,4,3,1:2,10:11)] #re-order rows (i.e. samples)
### create bar graph ###
pdf('HEM_LPPNB_BarGroup1.pdf') #export plot into PDF directly
col<-c("#009E73","#0072B2","#999999","#F0E442","#D55E00","white") #assign colours
par(mar=c(4,4,4,9))
barplot(LPPNB_group1_data_percentage_ordered, col=col, ylab="%", xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(LPPNB_group1_data_percentage_ordered),
        args.legend = list(x = 17, y=70, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf

### group 2 categories ###
LPPNB_group2<-aggregate(HEM_LPPNB[10:20], by=list(HEM_data$"group 2"),  FUN = sum) #group by 'group 2' categories
LPPNB_group2_data_percentage <- apply(LPPNB_group2[,2:12], 2, function(x){x*100/sum(x)})
row.names(LPPNB_group2_data_percentage)<-LPPNB_group2$Group.1 #calculate the % of each category within samples
### re-order rows/columns ###
LPPNB_group2_data_percentage_ordered<-LPPNB_group2_data_percentage[c(4,5,1,2,3,8,7,6,9),] #re-order rows (i.e. group 2 categories)
LPPNB_group2_data_percentage_ordered<-LPPNB_group2_data_percentage_ordered [, c(9,6:7,5,8,4,3,1:2,10:11)] #re-order rows (i.e. samples)
### create bar chart ###
pdf('HEM_LPPNB_BarGroup2.pdf') #export plot into PDF directly
col<-c("green","#009E73","#0072B2","#56B4E9","#999999","#F0E442","#D55E00","#CC79A7","white") #assign colours
par(mar=c(4,4,4,9))
barplot(LPPNB_group2_data_percentage_ordered, col=col, ylab="%", xlab="Sample No", las=2,cex.names = 0.45,
        legend.text = rownames(LPPNB_group2_data_percentage_ordered),
        args.legend = list(x = 19.2, y=70, cex=0.75)) #create bar graph
dev.off() #finished command to export plot to pdf


############ CORRESPONDENCE ANALYSIS PPNA & LPPNB HEMMEH ############
HEM_corro<-aggregate(HEM_data[10:55], by=list(HEM_data$"corro code"),  FUN = sum) #amalgamate taxa by corro code
HEM_extra<-HEM_corro
HEM_extra$count<-apply(HEM_corro[2:47],1,function(x) sum(x!=0)) #calculate count for corro code
HEM_corro = HEM_extra[HEM_extra$count > 5, ] #delete taxa which occur in less than 5 (10%) of samples
HEM_CA <-HEM_corro[2:47] #select any columns containing sample data
colSums(HEM_CA)
HEM_CA<-HEM_CA[, colSums(HEM_CA)>29.99] #remove samples with <30 items
labels<-HEM_corro$`Group.1` #labels of the rows
HEM_CA<-as.data.frame(t(HEM_CA)) #transpose the data
colnames(HEM_CA)<-labels #relabel the rows
### run the correspondence analysis ###
test<-cca(HEM_CA)

### generic biplot ###
pdf('HEM_CA_Biplot.pdf') #export plot into PDF directly
ordiplot(test)
dev.off() #finished command to export plot to pdf

### sample and species CA plots ###
scores<-scores(test, choices = c(1,2,3)) #extract the raw results
species<-data.frame(scores$species) #create a dataframe with CA scores for species
specieslabels<-row.names(species)
species<-cbind(specieslabels,species) #create a dataframe with CA scores for samples
samples<-data.frame(scores$sites)
samplelabels<-row.names(samples)
samples<-cbind(samplelabels,samples)
### plot species and modify plot - eg labels, colour etc ###
pdf('HEM_CA_Species.pdf') #export plot into PDF directly
plot(species$CA1,species$CA2, col="red", pch = 20, xlab="CA1", ylab="CA2")
text(species$CA1,species$CA2-0.06, labels = species$specieslabels, cex=0.5)
abline(h=0, lty=3)
abline(v=0, lty=3)
dev.off() #finished command to export plot to pdf

### plot samples and modify plot - eg labels, colour etc ###
pdf('HEM_CA_Samples.pdf') #export plot into PDF directly 
plot(samples$CA1,samples$CA2, xlab="CA1", ylab="CA2", pch=16)
text(samples$CA1,samples$CA2-0.15, labels = samples$samplelabels, cex=0.5)
abline(h=0, lty=3)
abline(v=0, lty=3)
dev.off() #finished command to export plot to pdf

### CA sample graph CODED ###
#samples coded by TIME PERIOD
samplesmerged<-merge.data.frame(samples,SamplesPeriod, by.x='samplelabels', by.y="sample")
samplesmerged<-samplesmerged %>% mutate(pchPERIOD=
                                          case_when(period== "PPNA"~1,
                                                    period== "LPPNB"~2
                                          ))
par(xpd=FALSE)
par(mar=c(4,4,4,4))
pdf('HEM_CA_SamplesPeriod.pdf') #export plot into PDF directly
plot(samplesmerged$CA1, samplesmerged$CA2, pch=as.numeric(as.character(samplesmerged$pchPERIOD)), col="black", xlab="CA1", ylab="CA2")
abline(h=0)
abline(v=0)
par(xpd=TRUE)
uni.pch<-unique(samplesmerged$pchPERIOD)
uni.PERIOD<-unique(samplesmerged$period)
legend("topright",inset=c(0,0), legend=uni.PERIOD, pch = as.numeric(as.character(uni.pch)))
dev.off() #finished command to export plot to pdf


############ ANALYSIS OF CHAFF MORPHOLOGY ############
### SHAR chaff categories ###
SHAR_chaff<-aggregate(SHAR_data [10:43], by=list(SHAR_data$"chaff code"),  FUN = sum) #group by 'chaff code' categories
### calculate sum of chaff code categories across all Sharara samples ####
SHAR_chaff_totals<-SHAR_chaff
SHAR_chaff_totals$sum<-rowSums(SHAR_chaff[2:35])
SHAR_chaff_totals <-SHAR_chaff_totals[36] 
SHAR_chaff_totals<-cbind.data.frame(SHAR_chaff[1], SHAR_chaff_totals) #separate out sums column and group 3 column
names(SHAR_chaff_totals )[names(SHAR_chaff_totals ) == "sum"] <- "SHAR" #rename sums column as SHAR

### HEM PPNA chaff categories ###
HEM_PPNA_chaff<-aggregate(HEM_PPNA [10:44], by=list(HEM_PPNA$"chaff code"),  FUN = sum) #group by 'chaff code' categories
### calculate sum of chaff categories across all HEM PPNA samples ####
HEM_PPNA_chaff_totals<-HEM_PPNA_chaff
HEM_PPNA_chaff_totals$sum<-rowSums(HEM_PPNA_chaff[2:36])
HEM_PPNA_chaff_totals <-HEM_PPNA_chaff_totals[37] 
HEM_PPNA_chaff_totals<-cbind.data.frame(HEM_PPNA_chaff[1], HEM_PPNA_chaff_totals)
names(HEM_PPNA_chaff_totals )[names(HEM_PPNA_chaff_totals ) == "sum"] <- "HEM_PPNA" #rename sums column as HEM PPNA

### HEM LPPNB chaff categories ###
HEM_LPPNB_chaff<-aggregate(HEM_LPPNB [10:20], by=list(HEM_LPPNB$"chaff code"),  FUN = sum) #group by 'chaff code' categories
### calculate sum of chaff categories across all HEM LPPNB samples ####
HEM_LPPNB_chaff_totals<-HEM_LPPNB_chaff
HEM_LPPNB_chaff_totals$sum<-rowSums(HEM_LPPNB_chaff[2:12])
HEM_LPPNB_chaff_totals <-HEM_LPPNB_chaff_totals[13] 
HEM_LPPNB_chaff_totals<-cbind.data.frame(HEM_LPPNB_chaff[1], HEM_LPPNB_chaff_totals) #separate out sums column and chaff column 
names(HEM_LPPNB_chaff_totals )[names(HEM_LPPNB_chaff_totals ) == "sum"] <- "HEM_LPPNB" #rename sums column as HEM LPPNB

### Merge chaff total tables for HEM PPNA, HEM LPPNB and SHAR ###
ChaffHEM <- merge(HEM_PPNA_chaff_totals, HEM_LPPNB_chaff_totals, by = "Group.1", all.x=TRUE, all.y=TRUE)
Chaff <- merge (SHAR_chaff_totals,ChaffHEM, by = "Group.1", all.x=TRUE, all.y=TRUE) 
Chaff [2:4][is.na(Chaff[2:4])]<-0 #change any na cells (within the columns which are numeric)

### Separate out Barley rachis and calculate relative percentages ###
BarRa<-Chaff[1:3,]
BarRa_data_percentage <- apply(BarRa[,2:4], 2, function(x){x*100/sum(x)})
row.names(BarRa_data_percentage)<-BarRa$Group.1 #calculate the % of each category within sample

### Separate our emmer glb and calculate relative percentages ###
GlwGb<-Chaff[4:6,]
GlwGb_data_percentage <- apply(GlwGb[,2:4], 2, function(x){x*100/sum(x)})
row.names(GlwGb_data_percentage)<-GlwGb$Group.1 #calculate the % of each category within sample

BarRa_data_percentage<-BarRa_data_percentage[c(3,2,1),] #re-order rows (i.e. rachis categories)
### create bar graph ###
pdf('Bar rachis site comparison.pdf') #export plot into PDF directly
col<-c("#E69F00","#0072B2","lightgrey") #assign colours
par(mar=c(4,4,4,9))
barplot(BarRa_data_percentage, col=col, ylab="%", xlab="Site/period", las=2,cex.names = 0.45,
        legend.text = rownames(BarRa_data_percentage),
        args.legend = list(x = 5, y=70, cex=0.5)) #create bar graph
dev.off() #finished command to export plot to pdf

### create bar graph ###
pdf('Emmer glume base site comparison.pdf') #export plot into PDF directly
col<-c("#E69F00","#0072B2","lightgrey") #assign colours
par(mar=c(4,4,4,9))
barplot(GlwGb_data_percentage, col=col, ylab="%", xlab="Site/period", las=2,cex.names = 0.45,
        legend.text = rownames(GlwGb_data_percentage),
        args.legend = list(x = 5, y=70, cex=0.5)) #create bar graph
dev.off() #finished command to export plot to pdf

### Redo cereal chaff categories after HEM_1143 removal ###
HEM_PPNA_chaffb<- subset(HEM_PPNA_chaff, select = -c(HEM_1143) ) #remove sample HEM_1143
### calculate sum of chaff categories across all samples ####
HEM_PPNA_chaffb_totals<-HEM_PPNA_chaffb
HEM_PPNA_chaffb_totals$sum<-rowSums(HEM_PPNA_chaffb[2:35])
HEM_PPNA_chaffb_totals <-HEM_PPNA_chaffb_totals[36] 
HEM_PPNA_chaffb_totals<-cbind.data.frame(HEM_PPNA_chaffb[1], HEM_PPNA_chaffb_totals)
names(HEM_PPNA_chaffb_totals )[names(HEM_PPNA_chaffb_totals ) == "sum"] <- "HEM_PPNAb" #rename sums column as HEM PPNA

### Merge chaff total tables ###
ChaffHEMb <- merge(HEM_PPNA_chaffb_totals, HEM_LPPNB_chaff_totals, by = "Group.1", all.x=TRUE, all.y=TRUE)
Chaffb <- merge (SHAR_chaff_totals,ChaffHEMb, by = "Group.1", all.x=TRUE, all.y=TRUE)
Chaffb [2:4][is.na(Chaffb[2:4])]<-0 #change any na cells (within the columns which are numeric)

### Separate our Barley rachis and calculate relative percentages ###
BarRab<-Chaffb[1:3,]
BarRab_data_percentage <- apply(BarRab[,2:4], 2, function(x){x*100/sum(x)})
row.names(BarRab_data_percentage)<-BarRab$Group.1 #calculate the % of each catergory within sample
BarRab_data_percentage<-BarRab_data_percentage[c(3,2,1),] #re-order rows (i.e. rachis categories)

### create bar graph ###
pdf('Bar rachis site comparison -HEM_1143.pdf') #export plot into PDF directly
col<-c("#E69F00","#0072B2","lightgrey") #assign colours
par(mar=c(4,4,4,9))
barplot(BarRab_data_percentage, col=col, ylab="%", xlab="Site/period", las=2,cex.names = 0.45,
        legend.text = rownames(BarRab_data_percentage),
        args.legend = list(x = 5, y=70, cex=0.5)) #create bar graph
dev.off() #finished command to export plot to pdf

### Separate our emmer glb and calculate relative percentages ###
GlwGbb<-Chaffb[4:6,]
GlwGbb_data_percentage <- apply(GlwGbb[,2:4], 2, function(x){x*100/sum(x)})
row.names(GlwGbb_data_percentage)<-GlwGbb$Group.1 #calculate the % of each catergory within sample

### create bar graph ###
pdf('Emmer glume base site comparison -HEM_1143.pdf') #export plot into PDF directly
col<-c("#E69F00","#0072B2","lightgrey") #assign colours
par(mar=c(4,4,4,9))
barplot(GlwGbb_data_percentage, col=col, ylab="%", xlab="Site/period", las=2,cex.names = 0.45,
        legend.text = rownames(GlwGbb_data_percentage),
        args.legend = list(x = 5, y=70, cex=0.5)) #create bar graph
dev.off() #finished command to export plot to pdf

############ METRIC ANALYSIS - BARLEY GRAINS ############
### SHARARA ### 
### copy imported table with barley grain measurements ###
SHAR_BarGrn_size <- BarGrn_measurements[BarGrn_measurements$Site == 'SHAR',] 
### define data for x and y values of scatterplot ###
x <- SHAR_BarGrn_size$`Breadth (mm)`
y <- SHAR_BarGrn_size$`Thickness (mm)`

### Make side by side scatterplots categorised according to White and Colledege ###
pdf('SHAR_BarGrn_SizeCategories.pdf') #export plot into PDF directly
par(mfrow=c(2,2))
#plot coded according to White size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(SHAR_BarGrn_size$White)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7, fill=c("#0072B2", "#999999","#E69F00"), title="White, n=67")
#plot coded according to Colledge size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(SHAR_BarGrn_size$Colledge)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7,fill=c("#0072B2", "#999999","#E69F00"), title="Colledge, n=67")
dev.off() #finished command to export plot to pdf

### PPNA HEMMEH ### 
### Prepare data ### 
HEM_PPNA_BarGrn_size <- BarGrn_measurements[BarGrn_measurements$Site == 'HEMMEH_PPNA',] 
### define data for x and y values of scatterplot ###
x <- HEM_PPNA_BarGrn_size$`Breadth (mm)`
y <- HEM_PPNA_BarGrn_size$`Thickness (mm)`
## Make side by side scatterplots categorised according to White and Colledege ###
pdf('HEM_PPNA_BarGrn_SizeCategories.pdf') #export plot into PDF directly
par(mfrow=c(2,2))
#plot coded according to White size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(HEM_PPNA_BarGrn_size$White)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7, fill=c("#0072B2", "#999999","#E69F00"), title="White, n=141")
#plot coded according to Colledge size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(HEM_PPNA_BarGrn_size$Colledge)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7,fill=c("#0072B2", "#999999","#E69F00"), title="Colledge, n=141")
dev.off() #finished command to export plot to pdf

### LPPNB HEMMEH ### 
### Prepare data ### 
HEM_LPPNB_BarGrn_size <- BarGrn_measurements[BarGrn_measurements$Site == 'HEMMEH_LPPNB',] 
### define data for x and y values of scatterplot ###
x <- HEM_LPPNB_BarGrn_size$`Breadth (mm)`
y <- HEM_LPPNB_BarGrn_size$`Thickness (mm)`
### Make side by side scatterplots categorised according to White and Colledege ###
pdf('HEM_LPPNB_BarGrn_SizeCategories.pdf') #export plot into PDF directly
par(mfrow=c(2,2))
#plot coded according to White size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(HEM_LPPNB_BarGrn_size$White)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7, fill=c("#0072B2", "#999999","#E69F00"), title="White, n=20")
#plot coded according to Colledge size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.5,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("#0072B2", "#999999","#E69F00")[as.factor(HEM_LPPNB_BarGrn_size$Colledge)], 
     pch=c(19), frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7,fill=c("#0072B2", "#999999","#E69F00"), title="Colledge, n=20")
dev.off() #finished command to export plot to pdf

### ALL SITES ### NOT IN FINAL MANUSCRIPT ###
### Prepare data ### 
All_BarGrn_size <- BarGrn_measurements
### define data for x and y values of scatterplot ###
x <- All_BarGrn_size$`Breadth (mm)`
y <- All_BarGrn_size$`Thickness (mm)`
### Make large scatterplot categorised according to White ###
pdf('ALL_BarGrn_SizeCategories_White.pdf') #export plot into PDF directly)
#plot coded according to White size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.9,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("black","#0072B2","#999999","#E69F00")[as.factor(All_BarGrn_size$White)], 
     pch = c(0,15,1,16,2,17)[as.factor(All_BarGrn_size$Site)], 
     frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7, fill=c("black","#0072B2","#999999","#E69F00"), title="White, n=228")
legend(0.75, 3, legend=c("Sharara","Hemmeh PPNA","Hemmeh LPPNB"), cex=0.7, pch=c (0,1,2), title="Sites")
dev.off() #finished command to export plot to pdf

###### copy imported table with barley grain measurements ######
All_BarGrn_size <- BarGrn_measurements
### define data for x and y values of scatterplot ###
x <- All_BarGrn_size$`Breadth (mm)`
y <- All_BarGrn_size$`Thickness (mm)`
### Make large scatterplot categorised according to Colledge ###
pdf('ALL_BarGrn_SizeCategories_Colledge.pdf') #export plot into PDF directly)
#plot coded according to Colledge size classifications
plot(x, y,
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mm)", ylab = "Thickness (mm)",
     cex = 0.9,
     cex.axis = 0.7,
     cex.lab = 0.7,
     col = c("black","#0072B2","#999999","#E69F00")[as.factor(All_BarGrn_size$Colledge)], 
     pch = c(0,15,1,16,2,17)[as.factor(All_BarGrn_size$Site)], 
     frame = FALSE)
legend(0, 3, legend=c("Domestic-sized","Intermediate-sized","Wild-sized"), cex=0.7, fill=c("black","#0072B2","#999999","#E69F00"), title="Colledge, n=228")
legend(0.75, 3, legend=c("Sharara","Hemmeh PPNA","Hemmeh LPPNB"), cex=0.7, pch=c (0,1,2), title="Sites")
dev.off() #finished command to export plot to pdf

### PLOT OF MEAN AND STANDARD DEVIATIONS OF THREE ASSEMBLAGES ### 
#summary data
Population <- c("Sharara","Hemmeh PPNA","Hemmeh LPPNB")
mean_breadth   <- c(1.91, 2.40, 2.47)
sd_breadth     <- c(0.42, 0.41, 0.53)
mean_thickness <- c(1.25, 1.64, 1.77)
sd_thickness   <- c(0.35, 0.37, 0.52)
#plot graph
# Colors / symbols (optional)
cols <- c("black", "black","black")
pch_vals <- 21:23

pdf('ALL_BarGrn_AvgSD.pdf') #export plot into PDF directly)
plot(NA, NA, 
     xlim = c(0.0, 4),
     ylim = c(0.0, 3),
     xlab = "Breadth (mean ± SD)",
     ylab = "Thickness (mean ± SD)",
     main = "Average Breadth × Thickness with ±1 SD Error Bars")

# Add points and error bars for each population
for (i in 1:length(Population)) {
  x <- mean_breadth[i]
  y <- mean_thickness[i]
  sx <- sd_breadth[i]
  sy <- sd_thickness[i]
# Horizontal error bar (breadth)
  arrows(x - sx, y, x + sx, y,
         angle = 90, code = 3, length = 0.05, col = cols[i], lwd = 2)
# Vertical error bar (thickness)
  arrows(x, y - sy, x, y + sy,
         angle = 90, code = 3, length = 0.05, col = cols[i], lwd = 2)
# Mean point
  points(x, y, pch = pch_vals[i], bg = cols[i], cex = 1.6)
}
# Legend
legend("topleft", legend = Population, pch = pch_vals, pt.bg = cols, bty = "n")
dev.off() #finished command to export plot to pdf




############ CROP PROCESSING ANALYSIS ############
### SHARARA ###
### Triplot ###
### calculate Barley:GLW:Cereal percentages to see which samples make the threshold [100] ###
SHAR_cereal <- SHAR_group1 #use table for samples after grouped by 'group 1' categories
SHAR_cereal <- SHAR_cereal[c(1,3,2,5,4,6),] #organise data so barley and glume wheat first two rows
SHAR_cereal <- head(SHAR_cereal,3) #delete all but first three rows (i.e. barley, glume wheat, cereal indet)
SHAR_cereal_100<-SHAR_cereal [2:35] #select any columns containing sample data
colSums(SHAR_cereal_100)
SHAR_cereal_100<-SHAR_cereal_100[, colSums(SHAR_cereal_100)>99.99] #remove samples with <100 items
SHAR_cereal_100<-cbind.data.frame(SHAR_cereal[1], SHAR_cereal_100) #add row labels back in
SHAR_cereal_data_percentage_100 <- apply(SHAR_cereal_100[,2:3], 2, function(x){x*100/sum(x)})
row.names(SHAR_cereal_data_percentage_100)<-SHAR_cereal_100$Group.1 #calculate the % of barley vs. glume wheat
#no sample(s) with less than 80% barley to delete
### organise data for triplot (grain, chaff, weeds) ###
SHAR_triplot_100<- aggregate(SHAR_data[10:43], by=list(SHAR_data$"triplot"),  FUN = sum) #group by 'triplot' categories
SHAR_triplot_100<- subset(SHAR_triplot_100, select = c(SHAR_003,SHAR_069) ) #keep only samples that meet criteria
SHAR_triplot_100_trans <- as.data.frame(t(SHAR_triplot_100)) 
colnames(SHAR_triplot_100_trans) <- c("grain", "rachis", "weeds") #transpose data and add in column names
### plot triplot 100 threshold ###
dev.new(width = 10.32, height=5.7, unit="in", noRStudioGD = TRUE)
crop.triplot(grain=SHAR_triplot_100_trans$grain, rachis=SHAR_triplot_100_trans$rachis, weeds=SHAR_triplot_100_trans$weeds) #make triplot 
dev.copy2pdf(file="SHAR_Triplot_100.pdf", encoding="WinAnsi")

### calculate Barley:GLW:Cereal percentages to see which samples make the threshold [30] ###
SHAR_cereal_30<-SHAR_cereal [2:35] #select any columns containing sample data
colSums(SHAR_cereal_30)
SHAR_cereal_30<-SHAR_cereal_30[, colSums(SHAR_cereal_30)>29.99] #remove samples with <30 items
SHAR_cereal_30<-cbind.data.frame(SHAR_cereal[1], SHAR_cereal_30) #add row labels back in
SHAR_cereal_data_percentage_30 <- apply(SHAR_cereal_30[,2:4], 2, function(x){x*100/sum(x)})
row.names(SHAR_cereal_data_percentage_30)<-SHAR_cereal_30$Group.1 #calculate the % of barley vs. glume wheat
#no sample(s) with less than 80% barley to delete
### organise data for triplot (grain, chaff, weeds) ###
SHAR_triplot_30<- aggregate(SHAR_data[10:43], by=list(SHAR_data$"triplot"),  FUN = sum) #group by 'triplot' categories
SHAR_triplot_30<- subset(SHAR_triplot_30, select = c(SHAR_003,SHAR_069,SHAR_077) ) #keep only samples that meet criteria
SHAR_triplot_30_trans <- as.data.frame(t(SHAR_triplot_30)) 
colnames(SHAR_triplot_30_trans) <- c("grain", "rachis", "weeds") #transpose data and add in column names
### plot triplot 30 threshold ###
dev.new(width = 10.32, height=5.7, unit="in", noRStudioGD = TRUE)
crop.triplot(grain=SHAR_triplot_30_trans$grain, rachis=SHAR_triplot_30_trans$rachis, weeds=SHAR_triplot_30_trans$weeds) #make triplot and label sample SHAR_Sp5 (or change to another sample by changing number and label)
dev.copy2pdf(file="SHAR_Triplot_30.pdf", encoding="WinAnsi")

### Weed-based DA ###
### Create a dataset for crop processing - just species,crop pro codes and samples ###
SHAR_croppro<-SHAR_data[c(2,9,10:43)]
SHAR_croppro<- subset(SHAR_croppro, select = c(1,2,SHAR_003,SHAR_069,SHAR_077)) ##select only samples that met above threshold for inclusion (>30 cereal items, of which 80% or more was barley)
SHAR_croppro<-SHAR_croppro[complete.cases(SHAR_croppro),] #remove any rows which don't have a croppro code
SHAR_croppro10<-SHAR_croppro[3:5][, colSums(SHAR_croppro[3:5])>9.9]
SHAR_croppro10<-cbind.data.frame(SHAR_croppro[1:2],SHAR_croppro10) #remove samples with <10 weed items
### transform data using crop.dataorg function ###
crop.org<-crop.dataorg(SHAR_croppro10, codes=2, samples=3)
### run LDA and plot data ###
LDAresult_crop<-LDAcrop.pro(crop.org)
pdf('SHAR_LDAresult.pdf') #export plot into PDF directly
crop.plot2D(LDAresult_crop, site = "SHAR", col="red")
dev.off() #finished command to export plot to pdf

### PPNA HEMMEH ###
### Triplot ###
### calculate Barley:GLW:Cereal percentages to see which samples make the threshold [100] ###
PPNA_cereal <- PPNA_group1 #use table for samples after grouped by 'group 1' categories
PPNA_cereal <- subset(PPNA_cereal, select = -c(HEM_1143) ) #remove outlier HEM_1143
PPNA_cereal <- PPNA_cereal[c(1,3,2,5,4,6),] #organise data so barley and glume wheat first two rows
PPNA_cereal <- head(PPNA_cereal,3) #delete all but first three rows (i.e. barley, glume wheat, cereal indet)
PPNA_cereal_100<-PPNA_cereal [2:35] #select any columns containing sample data
colSums(PPNA_cereal_100)
PPNA_cereal_100<-PPNA_cereal_100[, colSums(PPNA_cereal_100)>99.99] #remove samples with <100 items
PPNA_cereal_100<-cbind.data.frame(PPNA_cereal[1], PPNA_cereal_100) #add row labels back in
PPNA_cereal_data_percentage_100 <- apply(PPNA_cereal_100[,2:6], 2, function(x){x*100/sum(x)})
row.names(PPNA_cereal_data_percentage_100)<-PPNA_cereal_100$Group.1 #calculate the % of barley vs. glume wheat
#no sample(s) with less than 80% barley to delete
### organise data for triplot (grain, chaff, weeds) ###
PPNA_triplot_100<- aggregate(HEM_PPNA[10:43], by=list(HEM_PPNA$"triplot"),  FUN = sum) #group by 'triplot' categories
PPNA_triplot_100<- subset(PPNA_triplot_100, select = c(HEM_3237,HEM_3364,HEM_3476A,HEM_3476B,HEM_720) ) #keep only samples that meet criteria
PPNA_triplot_100_trans <- as.data.frame(t(PPNA_triplot_100)) 
colnames(PPNA_triplot_100_trans) <- c("grain", "rachis", "weeds") #transpose data and add in column names
### Triplot 100 threshold ###
dev.new(width = 10.32, height=5.7, unit="in", noRStudioGD = TRUE)
crop.triplot(grain=PPNA_triplot_100_trans$grain, rachis=PPNA_triplot_100_trans$rachis, weeds=PPNA_triplot_100_trans$weeds) #make triplot 
dev.copy2pdf(file="PPNA_Triplot_100.pdf", encoding="WinAnsi")

### calculate Barley:GLW:Cereal percentages to see which samples make the threshold [30] ###
PPNA_cereal_30<-PPNA_cereal [2:35] #select any columns containing sample data
colSums(PPNA_cereal_30)
PPNA_cereal_30<-PPNA_cereal_30[, colSums(PPNA_cereal_30)>29.99] #remove samples with <30 items
PPNA_cereal_30<-cbind.data.frame(PPNA_cereal[1], PPNA_cereal_30) #add row labels back in
PPNA_cereal_data_percentage_30 <- apply(PPNA_cereal_30[,2:12], 2, function(x){x*100/sum(x)})
row.names(PPNA_cereal_data_percentage_30)<-PPNA_cereal_30$Group.1 #calculate the % of barley vs. glume wheat
#no sample(s) with less than 80% barley to delete
### organise data for triplot (grain, chaff, weeds) ###
PPNA_triplot_30<- aggregate(HEM_PPNA[10:43], by=list(HEM_PPNA$"triplot"),  FUN = sum) #group by 'triplot' categories
PPNA_triplot_30<- subset(PPNA_triplot_30, select = c(HEM_815,HEM_886,HEM_3237,HEM_3364,HEM_3476A,HEM_3476B,HEM_720,HEM_717b,HEM_721A) ) #keep only samples that meet criteria
PPNA_triplot_30_trans <- as.data.frame(t(PPNA_triplot_30)) 
colnames(PPNA_triplot_30_trans) <- c("grain", "rachis", "weeds") #transpose data and add in column names
### Triplot 30 threshold ###
dev.new(width = 10.32, height=5.7, unit="in", noRStudioGD = TRUE)
crop.triplot(grain=PPNA_triplot_30_trans$grain, rachis=PPNA_triplot_30_trans$rachis, weeds=PPNA_triplot_30_trans$weeds) #make triplot 
dev.copy2pdf(file="PPNA_Triplot_30.pdf", encoding="WinAnsi")

### Weed-based DA ###
### Create a dataset for crop processing - just species,crop pro codes and samples ###
PPNA_croppro_30<-HEM_PPNA[c(2,9,10:44)]
PPNA_croppro_30<- subset(PPNA_croppro_30, select = c(1,2,HEM_815,HEM_886,HEM_3237,HEM_3364,HEM_3476A,HEM_3476B,HEM_720,HEM_717b,HEM_721A) ) #select only samples that met above threshold for inclusion (>10 cereal items, of which 80% or more was barley)
PPNA_croppro_30<-PPNA_croppro_30[complete.cases(PPNA_croppro_30),] #remove any rows which don't have a croppro code
PPNA_croppro30<-PPNA_croppro_30[3:11][, colSums(PPNA_croppro_30[3:11])>9.9]
PPNA_croppro30<-cbind.data.frame(PPNA_croppro_30[1:2],PPNA_croppro30) #remove samples with <10 items
### transform data using crop.dataorg function ###
crop.org<-crop.dataorg(PPNA_croppro30, codes=2, samples=3)
### run LDA and plot data ###
LDAresult_crop<-LDAcrop.pro(crop.org)
pdf('HEM_PPNA_LDAresult.pdf') #export plot into PDF directly
crop.plot2D(LDAresult_crop, site = "HEM PPNA_30", col="red")
dev.off() #finished command to export plot to pdf

### LPPNB HEMMEH ###
### Triplot ###
### calculate Barley:GLW:Cereal percentages to see which samples make the threshold [30] ###
LPPNB_cereal <- LPPNB_group1 #use table for samples after grouped by 'group 1' categories
LPPNB_cereal <- LPPNB_cereal[c(1,3,2,5,4,6),] #organise data so barley and glume wheat first two rows
LPPNB_cereal <- head(LPPNB_cereal,3) #delete all but first three rows (i.e. barley, glume wheat, cereal indet)
LPPNB_cereal_30<-LPPNB_cereal [2:12] #select any columns containing sample data
colSums(LPPNB_cereal_30)
LPPNB_cereal_30<-LPPNB_cereal_30[, colSums(LPPNB_cereal_30)>29.99] #remove samples with <30 items
LPPNB_cereal_30<-cbind.data.frame(LPPNB_cereal[1], LPPNB_cereal_30) #add row labels back in
LPPNB_cereal_data_percentage_30 <- apply(LPPNB_cereal_30[,2:4], 2, function(x){x*100/sum(x)})
row.names(LPPNB_cereal_data_percentage_30)<-LPPNB_cereal_30$Group.1 #calculate the % of barley vs. glume wheat
#no sample(s) with 80% glume wheat or barley

############ WEED ECOLOGICAL ANALYSIS - DATA PREPARATION ############
### SHARARA ###
### weed eco data - 100 cereal threshold ###
SHAR_crops <- SHAR_group1
SHAR_crops <- SHAR_crops[c(1,2,3), ]
SHAR_crops100<-SHAR_crops[2:35][, colSums(SHAR_crops[2:35])>99.9] #remove samples with less than 100 cereal crop items to get list of samples with 100+ cereal items
SHAR_weedeco_100 <- SHAR_data[c("weed eco code","SHAR_003","SHAR_069")]  #make new table from HEM_PPNA with only weed eco codes and those samples with 100+ crop items
SHAR_weedeco_100 <-na.omit(SHAR_weedeco_100)#delete N/A rows
SHAR_weedeco_100_temp<-SHAR_weedeco_100[2:3][, colSums(SHAR_weedeco_100[2:3])>9.9] #remove samples with less than 10 weed eco items 
SHAR_weedeco_100_temp <-(SHAR_weedeco_100_temp>0)+0
SHAR_weedeco_100<-cbind.data.frame(SHAR_weedeco_100[1], SHAR_weedeco_100_temp)
write.csv(SHAR_weedeco_100, file = "SHAR_weedeco_100.csv")

### weed eco data - 30 cereal threshold ###
SHAR_crops30<-SHAR_crops[2:35][, colSums(SHAR_crops[2:35])>29.9] #remove samples with less than 30 cereal crop items to get list of samples with 30+ cereal items
SHAR_weedeco_30 <- SHAR_data[c("weed eco code","SHAR_003","SHAR_069","SHAR_077")]  #make new table from HEM_PPNA with only weed eco codes and those samples with 100+ crop items
SHAR_weedeco_30 <-na.omit(SHAR_weedeco_30)#delete N/A rows
SHAR_weedeco_30_temp<-SHAR_weedeco_30[2:4][, colSums(SHAR_weedeco_30[2:4])>9.9] #remove samples with less than 10 weed eco items 
SHAR_weedeco_30_temp <-(SHAR_weedeco_30_temp>0)+0
SHAR_weedeco_30<-cbind.data.frame(SHAR_weedeco_30[1], SHAR_weedeco_30_temp)
write.csv(SHAR_weedeco_30, file = "SHAR_weedeco_30.csv")

### PPNA HEMMEH ###
### weed eco data - 100 cereal threshold ###
HEM_crops <- PPNA_group1
HEM_crops <- HEM_crops[c(1,2,3), ]
HEM_crops100<-HEM_crops[2:36][, colSums(HEM_crops[2:36])>99.9] #remove samples with less than 100 crop items to get list of samples with 100+ cereal items
HEM_crops100<- subset(HEM_crops100, select = -c(HEM_1143) ) #delete sample 1143
HEM_PPNA_weedeco_100 <- HEM_PPNA[c("weed eco code","HEM_3237","HEM_3364","HEM_3476A","HEM_3476B","HEM_720")] #make new table from HEM_PPNA with only weed eco codes and those samples with 100+ crop items
HEM_PPNA_weedeco_100 <-na.omit(HEM_PPNA_weedeco_100)#delete N/A rows
HEM_PPNA_weedeco_100_temp<-HEM_PPNA_weedeco_100[2:6][, colSums(HEM_PPNA_weedeco_100[2:6])>9.9] #remove samples with less than 10 weed eco items 
HEM_PPNA_weedeco_100_temp <-(HEM_PPNA_weedeco_100_temp>0)+0
HEM_PPNA_weedeco_100<-cbind.data.frame(HEM_PPNA_weedeco_100[1], HEM_PPNA_weedeco_100_temp)
write.csv(HEM_PPNA_weedeco_100, file = "HEM_PPNA_weedeco_100.csv")

### weed eco data - 30 cereal threshold ###
HEM_crops <- PPNA_group1
HEM_crops <- HEM_crops[c(1,2,3), ]
HEM_crops30<-HEM_crops[2:36][, colSums(HEM_crops[2:36])>29.9] #remove samples with less than 100 crop items to get list of samples with 100+ cereal items
HEM_crops30<- subset(HEM_crops30, select = -c(HEM_1143) ) #delete sample 1143
HEM_PPNA_weedeco_30 <- HEM_PPNA[c("weed eco code","HEM_815","HEM_886","HEM_3237","HEM_3364","HEM_3476A","HEM_3476B","HEM_717b","HEM_720","HEM_721A")] #make new table from HEM_PPNA with only weed eco codes and those samples with 100+ crop items
HEM_PPNA_weedeco_30 <-na.omit(HEM_PPNA_weedeco_30)#delete N/A rows
HEM_PPNA_weedeco_30_temp<-HEM_PPNA_weedeco_30[2:10][, colSums(HEM_PPNA_weedeco_30[2:10])>9.9] #remove samples with less than 10 weed eco items 
HEM_PPNA_weedeco_30_temp <-(HEM_PPNA_weedeco_30_temp>0)+0
HEM_PPNA_weedeco_30<-cbind.data.frame(HEM_PPNA_weedeco_30[1], HEM_PPNA_weedeco_30_temp)
write.csv(HEM_PPNA_weedeco_30, file = "HEM_PPNA_weedeco_30.csv")

### LPPNB HEMMEH ###
### weed eco data - 30 cereal threshold ###
HEM_LPPNBcrops <- LPPNB_group1
HEM_LPPNBcrops <- HEM_LPPNBcrops[c(1,2,3), ]
HEM_LPPNBcrops30<-HEM_LPPNBcrops[2:12][, colSums(HEM_LPPNBcrops[2:12])>29.9] #remove samples with less than 30 crop items to get list of samples with 100+ cereal items
HEM_LPPNB_weedeco <- HEM_LPPNB[c("weed eco code","HEM_378","HEM_555","HEM_353")] #make new table from HEM_PPNA with only weed eco codes and those samples with 100+ crop items
HEM_LPPNB_weedeco <-na.omit(HEM_LPPNB_weedeco)#delete N/A rows
HEM_LPPNB_weedeco_temp<-HEM_LPPNB_weedeco[2:4][, colSums(HEM_LPPNB_weedeco[2:4])>9.9] #remove samples with less than 10 weed eco items 
HEM_LPPNB_weedeco_temp <-(HEM_LPPNB_weedeco_temp>0)+0
HEM_LPPNB_weedeco<-cbind.data.frame(HEM_LPPNB_weedeco[1], HEM_LPPNB_weedeco_temp)
write.csv(HEM_LPPNB_weedeco, file = "HEM_LPPNB_30_weedeco.csv")