library(tidyverse)
library(ggplot2)
library(ggforce)

pca <- read_table("./PCA_with_all_samples_LD_pruned.eigenvec", col_names = TRUE)
eigenval <- scan("./PCA_with_all_samples_LD_pruned.eigenval")

pca <- pca[,-1]
names(pca)[1] <- "ind"
names(pca)[2:ncol(pca)] <- paste0("PC", 1:(ncol(pca)-1))

# Provide species names to samples

spp <- rep(NA, length(pca$ind))
spp[grep("aequatoriensis", pca$ind)] <- "Ipomoea aequatoriensis"
spp[grep("Ipomoea_batatas", pca$ind)] <- "Ipomoea batatas"
spp[grep("apiculata", pca$ind)] <- "Ipomoea batatas var. apiculata"
spp[grep("trifida", pca$ind)] <- "Ipomoea trifida"
spp[grep("tabascana", pca$ind)] <- "Ipomoea tabascana"

################################
### PLOTTING THE DATA ###

# Convert to percentage variance explained
pve <- data.frame(PC = 1:10, pve = eigenval/sum(eigenval)*100)

pdf("PCA_with_all_samples_LD_pruned.pdf", width=16, height=8, onefile=T)

# make plot
a <- ggplot(pve, aes(PC, pve)) + geom_bar(stat = "identity")
a + ylab("Percentage variance explained") + theme_light()

# calculate the cumulative sum of the percentage variance explained
cumsum(pve$pve)

# Plot PCA (PC1 vs PC2)

b <- ggplot(pca, aes(PC1, PC2, col = spp))  + geom_point(size = 3) + guides(x =  guide_axis(angle = 50))
b <- b + scale_colour_manual(values = c("blue", "light blue", "green", "orange", "black", "red"))
b <- b + coord_equal() + theme_light() + stat_ellipse(type = "norm")
b + xlab(paste0("PC1 (", signif(pve$pve[1], 3), "%)")) + ylab(paste0("PC2 (", signif(pve$pve[2], 3), "%)")) + theme(axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold"))


# Plot PCA (PC1 vs PC3)
c <- ggplot(pca, aes(PC1, PC3, col = spp)) + geom_point(size = 3) + guides(x =  guide_axis(angle = 50))
c <- c + scale_colour_manual(values = c("blue", "light blue", "green", "orange", "black", "red"))
c <- c + coord_equal() + theme_light() + stat_ellipse(type = "norm")
c + xlab(paste0("PC1 (", signif(pve$pve[1], 3), "%)")) + ylab(paste0("PC3 (", signif(pve$pve[3], 3), "%)")) + theme(axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold"))

# Plot PCA (PC2 vs PC3)
d <- ggplot(pca, aes(PC2, PC3, col = spp)) + geom_point(size = 3) + guides(x =  guide_axis(angle = 50))
d <- d + scale_colour_manual(values = c("blue", "light blue", "green", "orange", "black", "red"))
d <- d + coord_equal() + theme_light() + stat_ellipse(type = "norm")
d + xlab(paste0("PC2 (", signif(pve$pve[2], 3), "%)")) + ylab(paste0("PC3 (", signif(pve$pve[3], 3), "%)")) + theme(axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold"))

dev.off()
