multi_variable <- left_join(read_tsv("data/extraction.tsv"), read_tsv("data/sample.tsv"), by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
left_join(read_tsv("data/library.tsv"), by="sample_id") %>%
left_join(read_tsv("data/preprocessing.tsv"), by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(assembly_type == "Individual") %>%
dplyr::select(sample_id, tax_group,specimen_species, sample_type, extraction_total, library_PCR_cycles_required, singlem_fraction, host_duplicate_fraction, assembly_length, assembly_num_bins)
multi <- multi_variable %>%
dplyr::select(-c(tax_group,specimen_species,sample_type)) %>%
group_by(sample_id) %>%
slice_tail(n = 1) %>%
column_to_rownames(var="sample_id")
multi_dis <- multi_variable %>%
dplyr::select(-c(tax_group,specimen_species,sample_type)) %>%
group_by(sample_id) %>%
slice_tail(n = 1) %>%
column_to_rownames(var="sample_id") %>%
daisy(., metric = c("gower"))
multi_pcoa <- cmdscale(multi_dis)
species_loadings<-scores(envfit(ord=data.frame(scores(multi_pcoa)),env=data.frame(multi),na.rm = TRUE),"vectors")%>%
as.data.frame()%>%
rownames_to_column(.,var="variables")
species_loadings_rescaled<-species_loadings %>%
mutate(Dim1_normalized=decostand(Dim1,method="range"),
Dim2_normalized=decostand(Dim2,method="range"))%>%
mutate(Dim1_rescaled=Dim1_normalized * (max(scores(multi_pcoa)[,1]) - min(scores(multi_pcoa)[,1])) + min(scores(multi_pcoa)[,1]),
Dim2_rescaled=Dim2_normalized * (max(scores(multi_pcoa)[,2]) - min(scores(multi_pcoa)[,2])) + min(scores(multi_pcoa)[,2]))
Sample types
multi_pcoa %>%
vegan::scores() %>%
as_tibble(., rownames = "sample_id") %>%
dplyr::left_join(read_tsv("data/sample.tsv"), by = "sample_id") %>%
group_by(sample_type) %>%
mutate(x_cen = median(Dim1, na.rm = TRUE)) %>%
mutate(y_cen = median(Dim2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot() +
geom_point(aes(x = Dim1, y = Dim2, color = sample_type),alpha=0.5, size=0.5) +
geom_segment(aes(x = x_cen, y = y_cen, xend = Dim1, yend = Dim2, color = sample_type), alpha = 0.2) +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
geom_segment(data = species_loadings_rescaled,
aes(x = 0, xend = Dim1_rescaled, y = 0, yend = Dim2_rescaled),
arrow = arrow(length = unit(0.25, "cm")),size=0.5) +
geom_label_repel(data = species_loadings_rescaled, aes(x = Dim1_rescaled, y = Dim2_rescaled, label = variables),
size = 3)+
xlab("PCOA1")+
ylab("PCOA2")+
theme_minimal()

ggsave("figures/overall_pcoa_type.pdf",width=9, height=6, units="in")
Taxonomic groups
multi_pcoa %>%
vegan::scores() %>%
as_tibble(., rownames = "sample_id") %>%
dplyr::left_join(read_tsv("data/sample.tsv"), by = "sample_id") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
group_by(tax_group) %>%
mutate(x_cen = median(Dim1, na.rm = TRUE)) %>%
mutate(y_cen = median(Dim2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot() +
geom_point(aes(x = Dim1, y = Dim2, color = tax_group),alpha=0.5, size=0.5) +
geom_segment(aes(x = x_cen, y = y_cen, xend = Dim1, yend = Dim2, color = tax_group), alpha = 0.2) +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
geom_segment(data = species_loadings_rescaled,
aes(x = 0, xend = Dim1_rescaled, y = 0, yend = Dim2_rescaled),
arrow = arrow(length = unit(0.25, "cm")),size=0.5) +
geom_label_repel(data = species_loadings_rescaled, aes(x = Dim1_rescaled, y = Dim2_rescaled, label = variables),
size = 3)+
xlab("PCOA1")+
ylab("PCOA2")+
theme_minimal()

ggsave("figures/overall_pcoa_taxa.pdf",width=9, height=6, units="in")