5 Host genomics

5.1 Host DNA fraction

5.1.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(host_percentage, na.rm=T),
              sd=sd(host_percentage, na.rm=T),
              median=median(host_percentage, na.rm = TRUE),
              IQR=IQR(host_percentage, na.rm = TRUE)) %>% 
    tt()
tinytable_svkru5cfbuf1dulh9h1t
sample_type mean sd median IQR
Anal/cloacal swab 75.76413 32.05899 89.31666 36.46366
Faecal 22.94532 32.09473 6.09630 31.88080
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(host_percentage, na.rm=T),
              sd=sd(host_percentage, na.rm=T),
              median=median(host_percentage, na.rm = TRUE),
              IQR=IQR(host_percentage, na.rm = TRUE)) %>% 
    tt()
tinytable_s2qais63of820y0jgggx
tax_group mean sd median IQR
Amphibians 0.2968512 1.367301 0.02128969 0.05763236
Bats 49.4004705 37.779131 41.34831756 86.63187989
Birds 58.5039633 38.603463 70.50510687 81.48351233
Mammals 29.5029544 36.464343 8.99683115 50.10072047
Reptiles 12.4971018 22.008872 3.48495016 10.58589423

5.1.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    glm(cbind(host_bases,bases_post_fastp) ~ sample_type + tax_group, data = .,family=quasibinomial)  %>%
    Anova(.,test="F",type="III") %>%
    tidy()%>%
    tt()
tinytable_mm2z801voefvsmp03fnh
term sumsq df F.values p.value
sample_type 4.573093e+11 1 508.6208 1.241797e-100
tax_group 8.316384e+11 4 231.2377 1.420289e-163
Residuals 1.815316e+12 2019 NA NA
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(sample_type = factor(sample_type),tax_group=factor(tax_group)) %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    glm(cbind(host_bases,bases_post_fastp) ~ sample_type + tax_group, data = .,family=quasibinomial)  %>%
  glht(.,linfct = mcp(tax_group = "Tukey"))%>%
  summary()%>%
  tidy()%>%
  tt()
tinytable_2hjla6vp8dss042pyi3s
term contrast null.value estimate std.error statistic adj.p.value
tax_group Bats - Amphibians 0 5.1412163 0.68642282 7.489868 3.935741e-13
tax_group Birds - Amphibians 0 4.9599164 0.68662378 7.223630 1.793010e-12
tax_group Mammals - Amphibians 0 4.4582788 0.68642685 6.494907 3.172528e-10
tax_group Reptiles - Amphibians 0 3.6999420 0.68797728 5.378000 6.455164e-07
tax_group Birds - Bats 0 -0.1812999 0.05728337 -3.164966 1.009358e-02
tax_group Mammals - Bats 0 -0.6829374 0.05466064 -12.494135 0.000000e+00
tax_group Reptiles - Bats 0 -1.4412742 0.07106532 -20.280979 0.000000e+00
tax_group Mammals - Birds 0 -0.5016376 0.05073241 -9.887911 0.000000e+00
tax_group Reptiles - Birds 0 -1.2599743 0.07254355 -17.368522 0.000000e+00
tax_group Reptiles - Mammals 0 -0.7583368 0.07074971 -10.718585 0.000000e+00

5.1.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=host_percentage, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        geom_jitter(alpha = 0.2, width=0.3) +
        geom_boxplot(outlier.shape = NA) + 
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type) +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/hostdata_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=host_percentage, x=sample_type, group=sample_type)) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/hostdata_taxa_all.pdf",width=9, height=4, units="in")

5.2 Genome depth

5.2.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(depth, na.rm=T),
              sd=sd(depth, na.rm=T),
              median=median(depth, na.rm = TRUE),
              IQR=IQR(depth, na.rm = TRUE)) %>% 
    tt()
tinytable_2ouub0r942sqos4a3jhd
sample_type mean sd median IQR
Anal/cloacal swab 2.4220586 2.079382 1.9656642 1.9114049
Faecal 0.6573435 1.628490 0.1301568 0.6486201
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(depth, na.rm=T),
              sd=sd(depth, na.rm=T),
              median=median(depth, na.rm = TRUE),
              IQR=IQR(depth, na.rm = TRUE)) %>% 
    tt()
tinytable_nzceqla8raok62l4fzq5
tax_group mean sd median IQR
Amphibians 0.001921062 0.01314311 0.0000433766 0.0001050921
Bats 1.246383229 1.39021316 0.9629904655 1.6487318021
Birds 2.478493853 3.08305053 1.9449031457 2.7817309257
Mammals 0.595230910 1.27474674 0.1327805697 0.6743426328
Reptiles 0.345241814 0.60682463 0.1076937456 0.3145671900

5.2.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(rank(depth) ~ sample_type + tax_group, data = .)  %>%
    Anova(type = "III") %>%
    tidy()%>%
    tt()
tinytable_bl3x8di5einvhjhxcife
term sumsq df statistic p.value
(Intercept) 45259288 1 255.3583 2.982539e-54
sample_type 75036348 1 423.3640 1.215557e-85
tax_group 208078989 4 293.5016 1.475300e-199
Residuals 361920741 2042 NA NA
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    mutate(sample_type = factor(sample_type),tax_group=factor(tax_group)) %>%
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(rank(depth) ~ sample_type + tax_group, data = .)  %>%
    glht(.,linfct = mcp(tax_group = "Tukey"))%>%
    summary()%>%
    tidy()%>%
    tt()
tinytable_j11q4xd4t824w7yfhdz7
term contrast null.value estimate std.error statistic adj.p.value
tax_group Bats - Amphibians 0 1208.81726 39.35163 30.718356 0.0000000
tax_group Birds - Amphibians 0 1131.10292 41.03771 27.562527 0.0000000
tax_group Mammals - Amphibians 0 717.21446 36.74653 19.517881 0.0000000
tax_group Reptiles - Amphibians 0 744.78843 37.68408 19.764006 0.0000000
tax_group Birds - Bats 0 -77.71434 33.53182 -2.317630 0.1351404
tax_group Mammals - Bats 0 -491.60281 27.93331 -17.599160 0.0000000
tax_group Reptiles - Bats 0 -464.02884 29.08689 -15.953197 0.0000000
tax_group Mammals - Birds 0 -413.88846 28.68250 -14.429998 0.0000000
tax_group Reptiles - Birds 0 -386.31450 31.32238 -12.333498 0.0000000
tax_group Reptiles - Mammals 0 27.57397 25.44895 1.083501 0.8102494

5.2.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=depth, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
        ylim(0,10)+
        geom_boxplot(outlier.shape = NA) + 
        scale_color_manual(values = c("#bdca50", "#AA3377")) +   
        scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
        theme_classic() +
        labs(y="Host depth of coverage", color="Sample type", fill="Sample type") +
        theme_classic()

ggsave("figures/hostdepth_type.pdf",width=5, height=4, units="in")