4 Laboratory sample processing
4.1 DNA extraction
4.1.1 General statistics
read_tsv("data/extraction.tsv") %>%
summarise(
max= max(extraction_total, na.rm = TRUE),
min= min(extraction_total, na.rm = TRUE),
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE),
median=median(extraction_total, na.rm = TRUE),
IQR=IQR(extraction_total, na.rm = TRUE)
) %>%
tt()
max | min | mean | sd | median | IQR |
---|---|---|---|---|---|
7110 | 0 | 365.683 | 677.4012 | 100.35 | 425.7 |
4.1.2 Sample types
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
ggplot(aes(x=sample_type, y= extraction_total, fill=sample_type, color=sample_type)) +
ylim(0, 2000) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
theme_classic()
ggsave("figures/extraction_type.pdf",width=6, height=4, units="in")
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE),
median=median(extraction_total, na.rm = TRUE),
IQR=IQR(extraction_total, na.rm = TRUE)) %>%
tt()
sample_type | mean | sd | median | IQR |
---|---|---|---|---|
Anal/cloacal swab | 170.2285 | 362.7389 | 25.335 | 155.7743 |
Faecal | 316.6666 | 423.6434 | 129.375 | 430.8075 |
4.1.3 Taxonomy
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=extraction_total,x=tax_group,color=tax_group,fill=tax_group)) +
ylim(0, 2000) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
labs(y="DNA yield",x="Taxonomic group", color="Sample type") +
theme_classic()
ggsave("figures/extraction_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE),
median=median(extraction_total, na.rm = TRUE),
IQR=IQR(extraction_total, na.rm = TRUE)) %>%
tt()
tax_group | mean | sd | median | IQR |
---|---|---|---|---|
Amphibians | 233.35078 | 358.90219 | 128.250 | 137.2500 |
Bats | 59.60574 | 92.00357 | 30.735 | 63.4950 |
Birds | 74.43550 | 262.34517 | 7.380 | 27.3555 |
Mammals | 463.93202 | 471.41487 | 321.975 | 563.6250 |
Reptiles | 394.50619 | 437.86229 | 249.750 | 541.3500 |
4.1.3.1 Comparison
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
dplyr::select(extraction_total,sample_type,tax_group) %>%
mutate(sample_type = factor(sample_type),tax_group=factor(tax_group)) %>%
lm(rank(extraction_total) ~ tax_group+sample_type,data=.) %>%
Anova(.,type="III")%>%
tidy()%>%
tt()
term | sumsq | df | statistic | p.value |
---|---|---|---|---|
(Intercept) | 182499974 | 1 | 556.65758 | 2.404646e-111 |
tax_group | 413589974 | 4 | 315.38086 | 4.198853e-220 |
sample_type | 8154841 | 1 | 24.87373 | 6.538535e-07 |
Residuals | 824213930 | 2514 | NA | NA |
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
dplyr::select(extraction_total,sample_type,tax_group) %>%
mutate(sample_type = factor(sample_type),tax_group=factor(tax_group)) %>%
lm(rank(extraction_total) ~ tax_group+sample_type,data=.) %>%
glht(.,linfct = mcp(tax_group = "Tukey"))%>%
summary()%>%
tidy()%>%
tt()
term | contrast | null.value | estimate | std.error | statistic | adj.p.value |
---|---|---|---|---|---|---|
tax_group | Bats - Amphibians | 0 | -559.4931 | 52.02136 | -10.755065 | 0.000000e+00 |
tax_group | Birds - Amphibians | 0 | -725.2098 | 53.12799 | -13.650239 | 0.000000e+00 |
tax_group | Mammals - Amphibians | 0 | 311.3585 | 48.13741 | 6.468118 | 1.462295e-09 |
tax_group | Reptiles - Amphibians | 0 | 107.1624 | 49.38816 | 2.169799 | 1.848433e-01 |
tax_group | Birds - Bats | 0 | -165.7167 | 41.15805 | -4.026349 | 5.689658e-04 |
tax_group | Mammals - Bats | 0 | 870.8516 | 34.96158 | 24.908815 | 0.000000e+00 |
tax_group | Reptiles - Bats | 0 | 666.6555 | 36.79145 | 18.119849 | 0.000000e+00 |
tax_group | Mammals - Birds | 0 | 1036.5683 | 34.63845 | 29.925364 | 0.000000e+00 |
tax_group | Reptiles - Birds | 0 | 832.3722 | 38.06934 | 21.864631 | 0.000000e+00 |
tax_group | Reptiles - Mammals | 0 | -204.1961 | 30.99605 | -6.587811 | 6.917859e-10 |
4.2 Sequencing library preparation
4.2.1 Overall
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
summarise(
max= max(library_input_dna, na.rm = TRUE),
min= min(library_input_dna, na.rm = TRUE),
mean= mean(library_input_dna, na.rm = TRUE),
sd = sd(library_input_dna, na.rm = TRUE),
median=median(library_input_dna, na.rm = TRUE),
IQR=IQR(library_input_dna, na.rm = TRUE)) %>%
tt()
max | min | mean | sd | median | IQR |
---|---|---|---|---|---|
552 | 0 | 89.34565 | 84.03881 | 57.6 | 180.006 |
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
summarise(
max= max(library_PCR_cycles_required, na.rm = TRUE),
min= min(library_PCR_cycles_required, na.rm = TRUE),
mean= mean(library_PCR_cycles_required, na.rm = TRUE),
sd = sd(library_PCR_cycles_required, na.rm = TRUE),
median=median(library_PCR_cycles_required, na.rm = TRUE),
IQR=IQR(library_PCR_cycles_required, na.rm = TRUE)) %>%
tt()
max | min | mean | sd | median | IQR |
---|---|---|---|---|---|
33 | 2 | 9.150046 | 4.390537 | 8 | 6 |
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
filter(library_input_dna > 0) %>%
mutate(library_PCR_cycles_required=round(library_PCR_cycles_required))%>%
glm(library_PCR_cycles_required ~ tax_group*log(library_input_dna)+sample_type*log(library_input_dna), data = .,family=poisson()) %>%
Anova(.,test.statistic="Wald")
Analysis of Deviance Table (Type II tests)
Response: library_PCR_cycles_required
Df Chisq Pr(>Chisq)
tax_group 4 83.0304 < 2.2e-16 ***
log(library_input_dna) 1 782.5648 < 2.2e-16 ***
sample_type 1 27.0760 1.956e-07 ***
tax_group:log(library_input_dna) 4 69.2767 3.226e-14 ***
log(library_input_dna):sample_type 1 4.2027 0.04036 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
filter(library_input_dna > 0) %>%
filter(library_input_dna < 200) %>%
mutate(library_PCR_cycles_required=round(library_PCR_cycles_required))%>%
glm(library_PCR_cycles_required ~ tax_group*log(library_input_dna)+sample_type*log(library_input_dna), data = .,family=poisson()) %>%
plot_model(.,type="pred",terms=c("library_input_dna[1:200]","sample_type"),show.data = TRUE,dot.size = 1,line.size = 1)+
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type") +
theme_classic()
ggsave("figures/cycles_type.pdf",width=6, height=3, units="in")
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
filter(library_input_dna > 0) %>%
filter(library_input_dna < 200) %>%
mutate(library_PCR_cycles_required=round(library_PCR_cycles_required))%>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
glm(library_PCR_cycles_required ~ tax_group*log(library_input_dna)+sample_type*log(library_input_dna), data = .,family=poisson()) %>%
plot_model(.,type="pred",terms=c("library_input_dna [1:200]","tax_group"),show.data = TRUE,dot.size = 1,line.size = 1)+
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type", fill="Sample type") +
labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Taxonomic group") +
theme_classic()
4.3 Data quality
left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
left_join(read_tsv("data/index.tsv"),by="index_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
#Plot map EE6677 < bats
ggplot(aes(y=lowqual_perc_bases, x=index_PCR_cycles_given, colour=sample_type, fill=sample_type, group=sample_type)) +
geom_jitter(alpha=0.3) +
stat_smooth(method = "gam", formula = y ~ s(x, bs = "ps",k=4),se=FALSE, geom = "smooth", alpha=0.2) +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
scale_x_log10() +
theme_classic() +
theme(legend.position = "bottom")
left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
left_join(read_tsv("data/index.tsv"),by="index_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=lowqual_perc_bases, x=index_PCR_cycles_given, colour=tax_group, fill=tax_group, group=tax_group)) +
geom_jitter(alpha=0.3) +
stat_smooth(method = "gam", formula = y ~ s(x, bs = "ps",k=4),se=FALSE, geom = "smooth", alpha=0.2) +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
scale_x_log10() +
theme_classic() +
theme(legend.position = "bottom")
4.4 Data duplication
left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
left_join(read_tsv("data/index.tsv"),by="index_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
#Plot map EE6677 < bats
ggplot(aes(y=host_duplicate_fraction, x=index_PCR_cycles_given, colour=sample_type, fill=sample_type, group=sample_type)) +
geom_jitter(alpha=0.3) +
stat_smooth(method = "gam", formula = y ~ s(x, bs = "ps",k=4),se=FALSE, geom = "smooth", alpha=0.2) +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
scale_x_log10() +
theme_classic() +
theme(legend.position = "bottom")
left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
left_join(read_tsv("data/index.tsv"),by="index_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=host_duplicate_fraction, x=index_PCR_cycles_given, colour=tax_group, fill=tax_group, group=tax_group)) +
geom_jitter(alpha=0.3) +
stat_smooth(method = "gam", formula = y ~ s(x, bs = "ps",k=4),se=FALSE, geom = "smooth", alpha=0.2) +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
scale_x_log10() +
theme_classic() +
theme(legend.position = "bottom")