6 Microbial metagenomics
6.1 Microbial DNA fraction
6.1.1 Data overview
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(mean=mean(singlem_fraction, na.rm=T),
sd=sd(singlem_fraction, na.rm=T),
median=median(singlem_fraction, na.rm = TRUE),
IQR=IQR(singlem_fraction, na.rm = TRUE)) %>%
tt()
sample_type | mean | sd | median | IQR |
---|---|---|---|---|
Anal/cloacal swab | 0.2598510 | 0.3407707 | 0.0022 | 0.574700 |
Faecal | 0.5203527 | 0.2759433 | 0.6270 | 0.350875 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(mean=mean(singlem_fraction, na.rm=T),
sd=sd(singlem_fraction, na.rm=T),
median=median(singlem_fraction, na.rm = TRUE),
IQR=IQR(singlem_fraction, na.rm = TRUE)) %>%
tt()
tax_group | mean | sd | median | IQR |
---|---|---|---|---|
Amphibians | 0.5807892 | 0.1799217 | 0.62900 | 0.128200 |
Bats | 0.2903304 | 0.2949997 | 0.20650 | 0.523400 |
Birds | 0.2777113 | 0.3360460 | 0.07850 | 0.556025 |
Mammals | 0.5871579 | 0.2841097 | 0.67420 | 0.175250 |
Reptiles | 0.5885336 | 0.1701545 | 0.63475 | 0.114050 |
6.1.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(!is.na(singlem_fraction))%>%
mutate(singlem_fraction=case_when(singlem_fraction>1~1,
singlem_fraction<=1~singlem_fraction))%>%
glm(singlem_fraction ~ tax_group + sample_type, data = .,family=quasibinomial) %>%
Anova(.,test.statistic = "F",type = "III") %>%
tidy()%>%
tt()
term | sumsq | df | F.values | p.value |
---|---|---|---|---|
tax_group | 148.8863 | 4 | 119.1247 | 2.466079e-91 |
sample_type | 51.7346 | 1 | 165.5725 | 1.784789e-36 |
Residuals | 630.8543 | 2019 | NA | NA |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(!is.na(singlem_fraction))%>%
mutate(singlem_fraction=case_when(singlem_fraction>1~1,
singlem_fraction<=1~singlem_fraction),
sample_type=factor(sample_type),
tax_group=factor(tax_group))%>%
glm(singlem_fraction ~ tax_group + sample_type, data = .,family=quasibinomial) %>%
glht(.,linfct = mcp(tax_group = "Tukey"))%>%
summary()%>%
tidy()%>%
tt()
term | contrast | null.value | estimate | std.error | statistic | adj.p.value |
---|---|---|---|---|---|---|
tax_group | Bats - Amphibians | 0 | -1.25097357 | 0.10967066 | -11.4066383 | 0.000000e+00 |
tax_group | Birds - Amphibians | 0 | -0.88809563 | 0.11334382 | -7.8354131 | 2.187139e-14 |
tax_group | Mammals - Amphibians | 0 | 0.16650972 | 0.10026122 | 1.6607589 | 4.506772e-01 |
tax_group | Reptiles - Amphibians | 0 | 0.03298124 | 0.10207377 | 0.3231118 | 9.975622e-01 |
tax_group | Birds - Bats | 0 | 0.36287794 | 0.09724522 | 3.7315759 | 1.813404e-03 |
tax_group | Mammals - Bats | 0 | 1.41748328 | 0.08113495 | 17.4706869 | 0.000000e+00 |
tax_group | Reptiles - Bats | 0 | 1.28395481 | 0.08304328 | 15.4612727 | 0.000000e+00 |
tax_group | Mammals - Birds | 0 | 1.05460535 | 0.08202966 | 12.8563901 | 0.000000e+00 |
tax_group | Reptiles - Birds | 0 | 0.92107687 | 0.08779908 | 10.4907351 | 0.000000e+00 |
tax_group | Reptiles - Mammals | 0 | -0.13352848 | 0.07012718 | -1.9040901 | 3.080509e-01 |
6.1.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
mutate(host_percentage= host_bases/bases_post_fastp*100) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0, 1) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type) +
labs(y="Host percentage", color="Taxa", fill="Taxa") +
theme_classic()
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
mutate(host_percentage= host_bases/bases_post_fastp*100) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=singlem_fraction, x=sample_type, group=sample_type)) +
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Host percentage", color="Taxa", fill="Taxa") +
theme_classic()
ggsave("figures/microbialdata_taxa_all.pdf",width=9, height=4, units="in")
6.2 Domain-adjusted mapping rate
6.2.1 Data summary
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(mean=mean(damr, na.rm=T),
sd=sd(damr, na.rm=T),
median=median(damr, na.rm = TRUE),
IQR=IQR(damr, na.rm = TRUE)) %>%
tt()
sample_type | mean | sd | median | IQR |
---|---|---|---|---|
Anal/cloacal swab | 72.84533 | 22.25244 | 76.29903 | 30.61081 |
Faecal | 86.58154 | 25.00932 | 100.00000 | 15.61039 |
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(mean=mean(damr, na.rm=T),
sd=sd(damr, na.rm=T),
median=median(damr, na.rm = TRUE),
IQR=IQR(damr, na.rm = TRUE)) %>%
tt()
tax_group | mean | sd | median | IQR |
---|---|---|---|---|
Amphibians | 89.19356 | 24.84575 | 100.00000 | 5.927094 |
Bats | 80.94051 | 24.67483 | 91.76713 | 30.814236 |
Birds | 67.03815 | 24.99000 | 72.18568 | 29.811183 |
Mammals | 91.06092 | 16.77450 | 100.00000 | 11.191039 |
Reptiles | 84.52325 | 31.14150 | 100.00000 | 10.748612 |
6.2.2 Statistical test
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,11,singlem_fraction)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,1,mapping_percentage/singlem_fraction)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(!is.na(damr)) %>%
glm(damr ~ sample_type + tax_group, data = .,family=quasibinomial) %>%
Anova(test.statistic = "F",type="III") %>%
tidy()%>%
tt()
term | sumsq | df | F.values | p.value |
---|---|---|---|---|
sample_type | 13.49334 | 1 | 19.83706 | 8.794973e-06 |
tax_group | 76.64409 | 4 | 28.16933 | 6.244313e-23 |
Residuals | 1742.01471 | 2561 | NA | NA |
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,11,singlem_fraction)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,1,mapping_percentage/singlem_fraction)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(!is.na(damr)) %>%
mutate(sample_type=factor(sample_type),
tax_group=factor(tax_group))%>%
glm(damr ~ sample_type + tax_group, data = .,family=quasibinomial) %>%
glht(.,linfct = mcp(tax_group = "Tukey"))%>%
summary()%>%
tidy()%>%
tt()
term | contrast | null.value | estimate | std.error | statistic | adj.p.value |
---|---|---|---|---|---|---|
tax_group | Bats - Amphibians | 0 | -1.5736540 | 1.1220474 | -1.4024844 | 5.929805e-01 |
tax_group | Birds - Amphibians | 0 | -2.7097758 | 1.0700747 | -2.5323240 | 7.226328e-02 |
tax_group | Mammals - Amphibians | 0 | 0.3210380 | 1.1686087 | 0.2747181 | 9.985223e-01 |
tax_group | Reptiles - Amphibians | 0 | -3.2127466 | 1.0255111 | -3.1328247 | 1.272998e-02 |
tax_group | Birds - Bats | 0 | -1.1361218 | 0.5793468 | -1.9610391 | 2.562934e-01 |
tax_group | Mammals - Bats | 0 | 1.8946920 | 0.7458078 | 2.5404560 | 7.066287e-02 |
tax_group | Reptiles - Bats | 0 | -1.6390926 | 0.4921951 | -3.3301684 | 6.400175e-03 |
tax_group | Mammals - Birds | 0 | 3.0308138 | 0.6650554 | 4.5572350 | 3.762506e-05 |
tax_group | Reptiles - Birds | 0 | -0.5029708 | 0.3582254 | -1.4040625 | 5.917186e-01 |
tax_group | Reptiles - Mammals | 0 | -3.5337846 | 0.5906877 | -5.9824922 | 1.550295e-08 |
6.2.3 Plot
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(singlem_fraction>0) %>%
ggplot(aes(y=damr, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
geom_boxplot(outlier.shape = NA) +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
theme_classic() +
labs(y="DAMR", color="Taxa", fill="Taxa") +
theme_classic()
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(singlem_fraction>0) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=damr, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="DAMR", color="Taxa", fill="Taxa") +
theme_classic()
6.3 Assemblies
6.3.1 Data summary
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(sample_type) %>%
summarise(mean=mean(assembly_length, na.rm=T),
sd=sd(assembly_length, na.rm=T),
median=median(assembly_length, na.rm = TRUE),
IQR=IQR(assembly_length, na.rm = TRUE)) %>%
tt()
sample_type | mean | sd | median | IQR |
---|---|---|---|---|
Anal/cloacal swab | 13208042 | 34504175 | 0 | 0 |
Faecal | 95955306 | 83010036 | 97047664 | 151385731 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(tax_group) %>%
summarise(mean=mean(assembly_length, na.rm=T),
sd=sd(assembly_length, na.rm=T),
median=median(assembly_length, na.rm = TRUE),
IQR=IQR(assembly_length, na.rm = TRUE)) %>%
tt()
tax_group | mean | sd | median | IQR |
---|---|---|---|---|
Amphibians | 116614689 | 68762523 | 121907456 | 88263039 |
Bats | 31069176 | 77144825 | 0 | 41984504 |
Birds | 8688016 | 26409919 | 0 | 0 |
Mammals | 96415766 | 69841084 | 98624268 | 75754888 |
Reptiles | 139349280 | 74190452 | 143970512 | 93046444 |
6.3.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
lm(rank(assembly_length) ~ sample_type + tax_group, data = .) %>%
Anova(type="III") %>%
tidy()%>%
tt()
term | sumsq | df | statistic | p.value |
---|---|---|---|---|
(Intercept) | 33267819 | 1 | 314.2467 | 3.650912e-64 |
sample_type | 13028568 | 1 | 123.0674 | 1.427942e-27 |
tax_group | 100440135 | 4 | 237.1885 | 5.790079e-159 |
Residuals | 163985333 | 1549 | NA | NA |
6.3.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=assembly_length, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,400000000)+
geom_jitter(alpha = 0.3, width=0.3, size=0.5) +
geom_violin() +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type, scale="free") +
labs(y="Assembly size", color="Taxa", fill="Taxa") +
theme_classic()
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_length, x=sample_type, group=sample_type)) +
ylim(0,400000000)+
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Assembly size", color="Taxa", fill="Taxa") +
theme_classic()
ggsave("figures/assemblysize_taxa_all.pdf",width=9, height=4, units="in")
6.4 Number of MAGs
6.4.1 Summary statistics
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(sample_type) %>%
summarise(mean=mean(assembly_num_bins, na.rm=T),
sd=sd(assembly_num_bins, na.rm=T),
median=median(assembly_num_bins, na.rm = TRUE),
IQR=IQR(assembly_num_bins, na.rm = TRUE)) %>%
tt()
sample_type | mean | sd | median | IQR |
---|---|---|---|---|
Anal/cloacal swab | 2.644068 | 6.971768 | 0 | 0 |
Faecal | 18.386792 | 16.099835 | 19 | 29 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(tax_group) %>%
summarise(mean=mean(assembly_num_bins, na.rm=T),
sd=sd(assembly_num_bins, na.rm=T),
median=median(assembly_num_bins, na.rm = TRUE),
IQR=IQR(assembly_num_bins, na.rm = TRUE)) %>%
tt()
tax_group | mean | sd | median | IQR |
---|---|---|---|---|
Amphibians | 23.758389 | 14.944334 | 24 | 18 |
Bats | 3.692308 | 7.451098 | 0 | 5 |
Birds | 1.582979 | 5.600340 | 0 | 0 |
Mammals | 22.062635 | 16.345908 | 23 | 19 |
Reptiles | 23.924107 | 13.736366 | 24 | 18 |
6.4.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
glm(assembly_num_bins ~ sample_type + tax_group, data = .,family=quasipoisson) %>%
Anova(test.statistic = "F",type = "III") %>%
tidy()%>%
tt()
term | sumsq | df | F.values | p.value |
---|---|---|---|---|
sample_type | 1781.293 | 1 | 131.4547 | 2.875375e-29 |
tax_group | 9870.795 | 4 | 182.1096 | 5.599140e-128 |
Residuals | 20989.917 | 1549 | NA | NA |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
mutate(sample_type=factor(sample_type),
tax_group=factor(tax_group))%>%
glm(assembly_num_bins ~ sample_type + tax_group, data = .,family=quasipoisson) %>%
glht(.,linfct = mcp(tax_group = "Tukey"))%>%
summary()%>%
tidy()%>%
tt()
term | contrast | null.value | estimate | std.error | statistic | adj.p.value |
---|---|---|---|---|---|---|
tax_group | Bats - Amphibians | 0 | -1.89252007 | 0.13396130 | -14.12736443 | 0.0000000 |
tax_group | Birds - Amphibians | 0 | -2.37578946 | 0.20160147 | -11.78458429 | 0.0000000 |
tax_group | Mammals - Amphibians | 0 | -0.02494121 | 0.07183869 | -0.34718348 | 0.9962961 |
tax_group | Reptiles - Amphibians | 0 | -0.00222888 | 0.07136120 | -0.03123378 | 0.9999997 |
tax_group | Birds - Bats | 0 | -0.48326938 | 0.22581601 | -2.14010238 | 0.1759353 |
tax_group | Mammals - Bats | 0 | 1.86757887 | 0.12433118 | 15.02100124 | 0.0000000 |
tax_group | Reptiles - Bats | 0 | 1.89029119 | 0.12401954 | 15.24188255 | 0.0000000 |
tax_group | Mammals - Birds | 0 | 2.35084825 | 0.19506107 | 12.05185782 | 0.0000000 |
tax_group | Reptiles - Birds | 0 | 2.37356058 | 0.19518653 | 12.16047329 | 0.0000000 |
tax_group | Reptiles - Mammals | 0 | 0.02271233 | 0.05098722 | 0.44545131 | 0.9903572 |
6.4.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=assembly_num_bins, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,100) +
geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type, scale="free") +
labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
theme_classic()
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_num_bins, x=sample_type, group=sample_type)) +
ylim(0,100) +
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
theme_classic()
6.5 MAG quality
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=mag_completeness, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(50,100) +
#geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of bins", color="Taxa", fill="Taxa") +
theme_classic()
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=mag_contamination, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,10) +
#geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of bins", color="Taxa", fill="Taxa") +
theme_classic()
6.6 Assemblies vs MAGs
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual")%>%
with(cor(assembly_num_bins,assembly_length))
[1] 0.8511555
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_num_bins, x=assembly_length, color=tax_group)) +
geom_point(alpha=0.5, size=0.5) +
scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
geom_smooth(method="lm",se=FALSE)+
theme_classic() +
labs(y="Number of bins", x="Assembly length", color="Taxa", fill="Taxa")
6.7 Microbial fraction vs MAGs
microbial_fraction_mag <- left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
dplyr::select(assembly_num_bins,singlem_fraction,tax_group)
lm(assembly_num_bins ~ singlem_fraction * tax_group, data = microbial_fraction_mag) %>%
summary() %>%
tidy() %>%
tt()
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 3.12532677 | 4.96020104 | 0.6300807 | 5.287470e-01 |
singlem_fraction | 0.35631843 | 0.07871737 | 4.5265539 | 6.515925e-06 |
tax_groupReptiles | -6.78245261 | 5.90661233 | -1.1482813 | 2.510532e-01 |
tax_groupBirds | -3.86068566 | 5.17541920 | -0.7459658 | 4.558162e-01 |
tax_groupBats | -1.02417632 | 5.13266089 | -0.1995410 | 8.418692e-01 |
tax_groupMammals | 0.77359372 | 5.47676727 | 0.1412501 | 8.876932e-01 |
singlem_fraction:tax_groupReptiles | 0.10319272 | 0.09359558 | 1.1025385 | 2.704216e-01 |
singlem_fraction:tax_groupBirds | -0.28420760 | 0.08484622 | -3.3496788 | 8.310810e-04 |
singlem_fraction:tax_groupBats | -0.31514636 | 0.08324979 | -3.7855513 | 1.600051e-04 |
singlem_fraction:tax_groupMammals | -0.06268436 | 0.08524577 | -0.7353369 | 4.622604e-01 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
ggplot(aes(y=assembly_num_bins, x=singlem_fraction, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,100) +
geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
geom_smooth(method="lm") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
theme_classic()
6.8 Microbial fraction vs Nonpareil
microbial_fraction_nonpareil <- left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
dplyr::select(nonpareil_coverage,singlem_fraction,tax_group)
lm(nonpareil_coverage ~ singlem_fraction * tax_group, data = microbial_fraction_nonpareil) %>%
summary() %>%
tidy() %>%
tt()
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.469265681 | 0.0385474236 | 12.173724 | 1.911756e-32 |
singlem_fraction | 0.005266754 | 0.0006117397 | 8.609470 | 1.986087e-17 |
tax_groupReptiles | 0.197073303 | 0.0459023104 | 4.293320 | 1.884265e-05 |
tax_groupBirds | 0.234670179 | 0.0402199577 | 5.834670 | 6.719289e-09 |
tax_groupBats | 0.220349853 | 0.0398876683 | 5.524260 | 3.955751e-08 |
tax_groupMammals | 0.106761296 | 0.0425618369 | 2.508381 | 1.224354e-02 |
singlem_fraction:tax_groupReptiles | -0.002286717 | 0.0007273633 | -3.143844 | 1.703285e-03 |
singlem_fraction:tax_groupBirds | -0.002925969 | 0.0006593691 | -4.437528 | 9.830169e-06 |
singlem_fraction:tax_groupBats | -0.002763496 | 0.0006469627 | -4.271492 | 2.075836e-05 |
singlem_fraction:tax_groupMammals | -0.001383420 | 0.0006624741 | -2.088263 | 3.695868e-02 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
ggplot(aes(y=nonpareil_coverage, x=singlem_fraction, color=tax_group, fill=tax_group, group=tax_group)) +
geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
geom_smooth(method="lm") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Nonpareil completeness", color="Taxa", fill="Taxa") +
theme_classic()