5 Metagenomic data
5.1 Library complexity
Nonpareil estimate of the metagenomic complexity after removing host DNA.
all_data %>%
select(dataset,Extraction,nonpareil,Taxon) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(nonpareil), sd(nonpareil))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of breadth of host genome coverage")| Taxon | REF | DREX1 | DREX2 |
|---|---|---|---|
| Amphibian | 0.9±0.1 | 0.8±0.1 | 0.8±0.1 |
| Reptile | 0.9±0.1 | 0.9±0.0 | 0.9±0.1 |
| Mammal | 0.8±0.2 | 0.8±0.1 | 0.7±0.3 |
| Bird | 0.9±0.1 | 1.0±0.0 | 0.8±0.4 |
| Control | 0.0±0.0 | 0.5±0.6 | 0.5±0.7 |
all_data %>%
select(dataset,Extraction,nonpareil,Taxon,Species) %>%
unique() %>%
ggplot(aes(x=Extraction,y=nonpareil, color=Species, group=Extraction)) +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(. ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Nonpareil completeness",x="Extraction method")all_data %>%
select(dataset,Extraction,Sample,Species,nonpareil,Taxon,catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
#filter(catalogue != "All") %>%
MASS::glmmPQL(nonpareil~ Taxon+Extraction,random=~1|Species/Sample,
family="quasibinomial",data=.) %>%
car::Anova(.,test.statistic="Wald")Analysis of Deviance Table (Type II tests)
Response: zz
Chisq Df Pr(>Chisq)
Taxon 9.1822 3 0.0269646 *
Extraction 14.2287 2 0.0008133 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data<-all_data %>%
select(dataset,Extraction,Sample,Species,nonpareil,Taxon,catalogue) %>%
unique() %>%
filter(Taxon != "Control") #%>% filter(catalogue != "All")
M<-MASS::glmmPQL(nonpareil~ Taxon+Extraction,random=~1|Species/Sample,
family="quasibinomial",data=data)
Anova_table<-car::Anova(M,test.statistic="Wald",type="III")%>%
mutate(F=Chisq/Df) # Approximate F value
n <- nrow(M$data) # Total number of observations
df_fixed <- length(fixef(M)) # Number of fixed effect parameters
df_random <- length(ranef(M)) # Number of random effect parameters (or levels)
approx_residual_df <- n - df_fixed - df_random # Approx. residual Df
data.frame(Anova_table,
p_value=pf(Anova_table$F, Anova_table$Df, approx_residual_df, lower.tail = FALSE))%>%
select(F,p_value) F p_value
(Intercept) 34.129537 1.883942e-07
Taxon 3.060717 3.438361e-02
Extraction 7.114372 1.621943e-03
| effect | term | estimate | std.error | df | statistic | p.value |
|---|---|---|---|---|---|---|
| fixed | (Intercept) | 2.0341583 | 0.3636753 | 46 | 5.5933361 | 1.173059e-06 |
| fixed | TaxonReptile | 0.5969719 | 0.4633445 | 8 | 1.2883977 | 2.336181e-01 |
| fixed | TaxonMammal | -0.5520372 | 0.4122472 | 8 | -1.3390926 | 2.173392e-01 |
| fixed | TaxonBird | 0.4335768 | 0.4524258 | 8 | 0.9583381 | 3.659548e-01 |
| fixed | ExtractionDREX1 | 0.0578808 | 0.3338911 | 46 | 0.1733523 | 8.631351e-01 |
| fixed | ExtractionDREX2 | -0.8572831 | 0.2948977 | 46 | -2.9070523 | 5.597154e-03 |
Variance StdDev
Species = pdLogChol(1)
(Intercept) 2.270973e-09 4.765473e-05
Sample = pdLogChol(1)
(Intercept) 1.978035e-01 4.447510e-01
Residual 1.172704e-01 3.424477e-01
[1] 5.046029e-08
[1] 4
EHEX obtained lower nonpareil completeness values than REF and DREX. Lowest nonpareil completeness values were ontained for mammals and higest for reptiles.
5.2 Combined community analysis
species="combined"
genus=species
sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
rename(dataset=Dataset)
read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_counts.tsv.gz") %>%
rename(genome = 1)
genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_mag_info.tsv.gz") %>%
rename(genome = 1, length=mag_size)
genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134_coverage.tsv.gz") %>%
rename(genome = 1)
download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0134/DMB0134.tree.gz", "data/DMB0134.tree.gz")
genome_tree <- read_tree("data/DMB0134.tree.gz")5.2.1 Filter data
#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
mutate(across(-1, ~ . * read_counts[[cur_column()]]))
# Transform to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))5.2.2 Alpha diversity
#Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=0) %>%
t() %>%
as.data.frame() %>%
rename(richness=1) %>%
rownames_to_column(var="dataset")
neutral <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1) %>%
t() %>%
as.data.frame() %>%
rename(neutral=1) %>%
rownames_to_column(var="dataset")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hilldiv(.,q=1,tree=genome_tree) %>%
t() %>%
as.data.frame() %>%
rename(phylogenetic=1) %>%
rownames_to_column(var="dataset")
# Merge alpha diversities
alpha_diversity <- richness %>%
full_join(neutral,by=join_by(dataset==dataset)) %>%
full_join(phylogenetic,by=join_by(dataset==dataset))
# Write alpha diversities
#alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))
# Print alpha diversity
alpha_diversity %>%
left_join(sample_metadata %>%
select(dataset, Extraction, Sample), by = join_by(dataset == dataset)) %>%
group_by(Extraction) %>%
summarise(richness = mean(richness), neutral = mean(neutral), phylogenetic = mean(phylogenetic)) %>%
tt()| Extraction | richness | neutral | phylogenetic |
|---|---|---|---|
| DREX1 | 53.69565 | 24.45051 | 4.093443 |
| DREX2 | 53.45455 | 25.65059 | 4.011921 |
| ZYMO | 78.76190 | 35.06459 | 4.747699 |
alpha_data %>%
dplyr::select(dataset,Library,Species,Taxon,Sample,Extraction, richness,neutral,phylogenetic) %>%
pivot_longer(!c(dataset,Library,Species,Taxon,Sample,Extraction), names_to = "metric", values_to = "value") %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian","Reptile","Mammal"))) %>%
unique() %>%
ggplot(aes(x=Extraction,y=value, color=Species, group=Extraction)) +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(metric ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Diversity",x="Extraction method")5.2.3 Richness
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, richness, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(log(richness) ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
plot()alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, richness, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(log(richness) ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
anova()Type III Analysis of Variance Table with Satterthwaite's method
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Taxon 11.1732 3.7244 3 8.416 5.8091 0.01921 *
Extraction 0.7084 0.3542 2 39.374 0.5525 0.57992
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, richness, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(log(richness) ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
r.squaredGLMM() R2m R2c
[1,] 0.3901002 0.7584841
## 2% of the variance explained by fixed effects
## 91% of the variance explained by random effects
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, richness, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(log(richness) ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
VarCorr() %>%
print(comp = "Variance") Groups Name Variance
Sample:Species (Intercept) 0.52240
Species (Intercept) 0.45551
Residual 0.64113
[1] 58.48204
[1] 32.51796
REF got slightly lower richness values than DREX and EHEX. Large variability between species within taxa.
5.2.4 Neutral
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, neutral, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(neutral ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
plot()alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, neutral, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(neutral ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
anova()Type III Analysis of Variance Table with Satterthwaite's method
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Taxon 909.42 303.14 3 8.324 2.3875 0.1418
Extraction 790.15 395.08 2 39.139 3.1116 0.0557 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, neutral, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(neutral ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
r.squaredGLMM() R2m R2c
[1,] 0.2662611 0.7706441
## 2% of the variance explained by fixed effects
## 91% of the variance explained by random effects
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, neutral, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(neutral ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
VarCorr() %>%
print(comp = "Variance") Groups Name Variance
Sample:Species (Intercept) 120.83
Species (Intercept) 158.39
Residual 126.97
[1] 69.35093
[1] 19.64907
No significant differences in neutral diversity between extraction methods and taxonomic groups. Neutral diversity very species specific.
5.2.5 Phylogenetic
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, phylogenetic, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(phylogenetic ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
plot()alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, phylogenetic, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(phylogenetic ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
anova()Type III Analysis of Variance Table with Satterthwaite's method
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Taxon 11.1581 3.7194 3 8.421 3.6047 0.06232 .
Extraction 1.7646 0.8823 2 39.188 0.8551 0.43302
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, phylogenetic, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(phylogenetic ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
r.squaredGLMM() R2m R2c
[1,] 0.2661818 0.7436036
## 6% of the variance explained by fixed effects
## 90% of the variance explained by random effects
alpha_data %>%
dplyr::select(dataset, Extraction, Sample, Species, phylogenetic, Taxon, catalogue) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(phylogenetic ~ Taxon + Extraction + (1 | Species / Sample), data = ., REML = TRUE) %>%
VarCorr() %>%
print(comp = "Variance") Groups Name Variance
Sample:Species (Intercept) 1.74086
Species (Intercept) 0.18041
Residual 1.03180
[1] 42.82092
[1] 47.17908
No significant differences in phylogenetic diversity between extraction methods and taxonomic groups.
5.3 Microbial complexity recovery
all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon,catalogue) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>% # what do we filter for?
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.3f±%.3f", mean(damr), sd(damr))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of breadth of host genome coverage")| Taxon | REF | DREX1 | DREX2 |
|---|---|---|---|
| Amphibian | 0.755±0.346 | 0.735±0.337 | 0.755±0.335 |
| Reptile | 0.947±0.070 | 0.970±0.054 | 0.977±0.044 |
| Mammal | 0.768±0.144 | 0.889±0.165 | 0.831±0.154 |
| Bird | 0.856±0.197 | 0.793±0.261 | 0.724±0.319 |
| Control | 1.000±0.000 | 1.000±0.000 | 1.000±0.000 |
all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon,Sample,Species) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
ggplot(aes(x=Extraction,y=damr, color=Species, group=Extraction)) +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(. ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Domain-adjusted mapping rate",x="Extraction method")all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon, Sample, Species,catalogue) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
filter(Taxon != "Control") %>%
MASS::glmmPQL(damr~ Taxon+Extraction,random=~1|Species/Sample,
family="quasibinomial",data=.) %>%
car::Anova(.,test.statistic="Wald")Analysis of Deviance Table (Type II tests)
Response: zz
Chisq Df Pr(>Chisq)
Taxon 7.0478 3 0.07039 .
Extraction 0.7968 2 0.67138
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data<-all_data %>%
select(dataset,Extraction,microbial_fraction,MAG_mapping_percentage,Taxon, Sample, Species,catalogue) %>%
mutate(damr=pmin(1,MAG_mapping_percentage/(microbial_fraction))) %>%
mutate(damr=ifelse(is.na(damr),0,damr)) %>%
unique() %>%
filter(Taxon != "Control")
M<-MASS::glmmPQL(damr~ Taxon+Extraction,random=~1|Species/Sample,
family="quasibinomial",data=data)
Anova_table<-car::Anova(M,test.statistic="Wald",type="III")%>%
mutate(F=Chisq/Df) # Approximate F value
n <- nrow(M$data) # Total number of observations
df_fixed <- length(fixef(M)) # Number of fixed effect parameters
df_random <- length(ranef(M)) # Number of random effect parameters (or levels)
approx_residual_df <- n - df_fixed - df_random # Approx. residual Df
data.frame(Anova_table,
p_value=pf(Anova_table$F, Anova_table$Df, approx_residual_df, lower.tail = FALSE))%>%
select(F,p_value) F p_value
(Intercept) 5.0023896 0.02880432
Taxon 2.3492555 0.08075295
Extraction 0.3984138 0.67303769
| effect | term | estimate | std.error | df | statistic | p.value |
|---|---|---|---|---|---|---|
| fixed | (Intercept) | 1.33363650 | 0.6227920 | 46 | 2.1413836 | 0.03757338 |
| fixed | TaxonReptile | 2.30012902 | 0.9535923 | 8 | 2.4120676 | 0.04237128 |
| fixed | TaxonMammal | 0.40556099 | 0.8473819 | 8 | 0.4786048 | 0.64502663 |
| fixed | TaxonBird | 0.40804767 | 0.8579913 | 8 | 0.4755849 | 0.64708519 |
| fixed | ExtractionDREX1 | 0.15284789 | 0.2883826 | 46 | 0.5300177 | 0.59864887 |
| fixed | ExtractionDREX2 | -0.08950725 | 0.2795925 | 46 | -0.3201346 | 0.75031595 |
## 27% of the variance explained by fixed effects
## 27% of the variance explained by random effects
VarCorr(M) Variance StdDev
Species = pdLogChol(1)
(Intercept) 1.073914e-07 0.0003277063
Sample = pdLogChol(1)
(Intercept) 1.679619e+00 1.2960011568
Residual 9.176235e-02 0.3029230022
[1] 3.595301e-07
[1] 27
All three extraction methods yield similar DAMRs.
5.3.1 Community barplot
# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Stacked barplot
genome_counts %>%
mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE))) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
left_join(., sample_metadata, by = join_by(dataset == dataset)) %>%
mutate(Taxon=factor(Taxon,levels=c("Amphibian",
"Reptile",
"Mammal",
"Bird",
"Control"))) %>%
mutate(Species=factor(Species,levels=c("Calotriton asper",
"Lissotriton helveticus",
"Salamandra atra",
"Chalcides striatus",
"Natrix astreptophora",
"Podarcis muralis",
"Plecotus auritus",
"Sciurus carolinensis",
"Trichosurus vulpecula",
"Geospizopsis unicolor",
"Perisoreus infaustus",
"Zonotrichia capensis",
"Extraction control",
"Library control"))) %>%
filter(Taxon != "Control") %>%
ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance") +
facet_nested(. ~ Taxon + Species + Sample + Extraction, scales="free_x") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.title=element_blank(),
panel.spacing = unit(0, "lines"))genome_counts_filt_variance <- genome_counts_filt %>%
column_to_rownames(var="genome") %>%
select(all_of(sample_metadata %>%
filter(!Taxon %in% c("Birds", "Control")) %>% # exclude birds
pull(dataset))) %>%
select(where(~!all(. == 0)))# Perform betadisper analysis
richness_betadisper_result <- genome_counts_filt_variance %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>%
betadisper(., sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))) %>% pull(Extraction))
# Check if there are significant differences in dispersion
permutest(richness_betadisper_result,pairwise = TRUE) # Tests homogeneity of dispersion and pairwise comparisons between groups using permutations.
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.009035 0.0045175 1.1444 999 0.33
Residuals 60 0.236859 0.0039477
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
DREX1 DREX2 ZYMO
DREX1 0.57300 0.203
DREX2 0.55115 0.341
ZYMO 0.20658 0.35323
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = distances ~ group, data = df)
$group
diff lwr upr p adj
DREX2-DREX1 -0.007193598 -0.05379165 0.03940445 0.9270330
ZYMO-DREX1 -0.028224995 -0.07482304 0.01837305 0.3194398
ZYMO-DREX2 -0.021031398 -0.06762945 0.02556665 0.5272157
richness <- genome_counts_filt_variance %>%
hillpair(.,q=0, metric="C") %>%
as.dist() %>% # nrow(richness)
adonis2(formula = . ~ Extraction+Taxon+Species+Sample,
data = sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))),
permutations = 999,
by="terms") %>%
broom::tidy()
richness# A tibble: 6 × 6
term df SumOfSqs R2 statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Extraction 2 0.575 0.0208 3.62 0.001
2 Taxon 3 6.62 0.239 27.7 0.001
3 Species 8 13.2 0.475 20.7 0.001
4 Sample 11 4.31 0.156 4.93 0.001
5 Residual 38 3.02 0.109 NA NA
6 Total 62 27.7 1 NA NA
# Perform betadisper analysis
neutral_betadisper_result <- genome_counts_filt_variance %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
betadisper(., sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))) %>% pull(Extraction))
# Check if there are significant differences in dispersion
permutest(neutral_betadisper_result,pairwise=TRUE) # Tests homogeneity of dispersion and pairwise comparisons between groups using permutations.
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.009171 0.0045854 1.3738 999 0.26
Residuals 60 0.200261 0.0033377
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
DREX1 DREX2 ZYMO
DREX1 0.42700 0.171
DREX2 0.42982 0.343
ZYMO 0.16936 0.32804
neutral <- genome_counts_filt_variance %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Extraction+Taxon+Species+Sample,
data = sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))),
permutations = 999,
by="terms") %>%
broom::tidy()
neutral# A tibble: 6 × 6
term df SumOfSqs R2 statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Extraction 2 0.447 0.0160 5.29 0.001
2 Taxon 3 6.83 0.244 53.9 0.001
3 Species 8 13.4 0.479 39.7 0.001
4 Sample 11 5.68 0.203 12.2 0.001
5 Residual 38 1.61 0.0574 NA NA
6 Total 62 28.0 1 NA NA
# Perform betadisper analysis
phylogenetic_betadisper_result <- genome_counts_filt_variance %>%
hillpair(.,q=1, metric="C") %>%
as.dist() %>%
betadisper(., sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))) %>% pull(Extraction))
# Check if there are significant differences in dispersion
permutest(phylogenetic_betadisper_result,pairwise = TRUE) # Tests homogeneity of dispersion and pairwise comparisons between groups using permutations.
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.009171 0.0045854 1.3738 999 0.287
Residuals 60 0.200261 0.0033377
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
DREX1 DREX2 ZYMO
DREX1 0.46000 0.183
DREX2 0.42982 0.322
ZYMO 0.16936 0.32804
phylogenetic <- genome_counts_filt_variance %>%
hillpair(.,q=1, tree=genome_tree, metric="C") %>%
as.dist() %>%
adonis2(formula = . ~ Extraction+Taxon+Species+Sample,
data = sample_metadata %>%
filter(dataset %in% colnames(genome_counts_filt_variance)) %>%
arrange(match(dataset,colnames(genome_counts_filt_variance))),
permutations = 999,
by="terms") %>%
broom::tidy()
phylogenetic# A tibble: 6 × 6
term df SumOfSqs R2 statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Extraction 2 0.259 0.0259 4.96 0.001
2 Taxon 3 3.63 0.364 46.4 0.001
3 Species 8 3.67 0.368 17.6 0.001
4 Sample 11 1.41 0.142 4.93 0.001
5 Residual 38 0.991 0.0994 NA NA
6 Total 62 9.97 1 NA NA
data.frame(
Term = factor(rep(c("Extraction", "Taxon", "Species","Sample","Residual"), times = 3),levels = c("Extraction", "Taxon", "Species","Sample","Residual")),
Metric = factor(rep(c("Richness", "Neutral", "Phylogenetic"), each = 5),levels = c("Richness", "Neutral", "Phylogenetic")),
Value = c(richness$R2[-6],neutral$R2[-6],phylogenetic$R2[-6])
)%>%
ggplot(., aes(x = Metric, y = Value, fill = Term)) +
geom_bar(stat = "identity", position = "stack") +
#scale_fill_brewer(palette = "Set1") + # Choose a color palette
scale_fill_manual(values = c("#47bfb6", "#4a7015", "#5e1717", "#9e6b24", "#4a4a4a")) +
labs(
x = "Beta diversity metric",
y = "R-square",
fill = "Term"
) +
theme_minimal()genome_counts_NMDS <- genome_counts_filt_variance %>%
#column_to_rownames(var="genome") %>%
select(where(~!all(. == 0))) %>%
hillpair(.,q=1, metric="C", out="dist") %>%
metaMDS(.,trymax = 999, k=2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "dataset") %>%
left_join(sample_metadata, by = join_by(dataset == dataset)) %>%
filter(Taxon != "Control") %>%
group_by(Sample) %>%
mutate(sample_x=mean(NMDS1), sample_y=mean(NMDS2))
genome_counts_NMDS %>%
ggplot(., aes(x=NMDS1,y=NMDS2, color=Species, shape=Extraction)) +
scale_color_manual(values=vertebrate_colors) +
geom_point(size=3, alpha=0.8) +
geom_segment(aes(x=sample_x, y=sample_y, xend=NMDS1, yend=NMDS2), alpha=0.2) +
theme_classic() +
theme(legend.position="right", legend.box="vertical") +
guides(color=guide_legend(title="Species"))