3 Sequencing data

3.1 Sequencing depth

tibble(metric=c("Total GB", "Total reads", "Average GB", "Average reads"),
       value=unlist(c(round(all_data %>% summarise(sum(bases_pre_fastp)) / 1000000000,2),
               round(all_data %>% summarise(sum(bases_pre_fastp)) / 300,2),
               paste0(round(all_data %>% summarise(mean(bases_pre_fastp)) / 1000000000,2),"±",round(all_data %>% summarise(sd(bases_pre_fastp)) / 1000000000,2)),
               paste0(round(all_data %>% summarise(mean(bases_pre_fastp)) / 300,0),"±",round(all_data %>% summarise(sd(bases_pre_fastp)) / 300,0))))
       ) %>%
  tt()
metric value
Total GB 429.1
Total reads 1430317433
Average GB 5.36±2.7
Average reads 17878968±9011624
all_data %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(bases_post_fastp / 1000000000), sd(bases_post_fastp / 1000000000))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of sequencing depth (GB)")
Mean and standard deviation of sequencing depth (GB)
Taxon REF DREX1 DREX2
Amphibian 4.0±1.5 4.2±2.3 4.7±0.4
Reptile 6.1±2.3 5.7±1.4 5.0±1.9
Mammal 5.5±3.3 4.6±2.2 3.8±2.5
Bird 3.7±1.9 3.4±2.5 2.7±1.9
Control 0.0±0.0 0.5±0.6 2.1±2.7
all_data %>%
    select(Library,Species,Extraction,bases_pre_fastp,Taxon) %>%
    mutate(bases_pre_fastp=bases_pre_fastp/1000000000) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=bases_pre_fastp, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Sequencing depth (Gb)",x="Extraction method")

all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    plot()

all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    anova()
Type III Analysis of Variance Table with Satterthwaite's method
               Sum Sq    Mean Sq NumDF DenDF F value Pr(>F)
Taxon      2.0153e+19 6.7177e+18     3     8  2.2598 0.1587
Extraction 7.4191e+18 3.7095e+18     2   Inf  1.2479 0.2871
all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    broom.mixed::tidy() %>%
    tt()
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 4671932550 698716103 6.6864532 1.158464e+01 2.665888e-05
fixed NA TaxonReptile 1321944147 900698455 1.4676878 7.997188e+00 1.803849e-01
fixed NA TaxonMammal 363028694 900698455 0.4030524 7.997188e+00 6.974701e-01
fixed NA TaxonBird -994816271 900698455 -1.1044943 7.997188e+00 3.014989e-01
fixed NA ExtractionDREX1 -386734817 497720822 -0.7770115 6.659800e+18 4.371520e-01
fixed NA ExtractionDREX2 -786257695 497720822 -1.5797163 3.916756e+18 1.141719e-01
ran_pars Sample:Species sd__(Intercept) 1178495447 NA NA NA NA
ran_pars Species sd__(Intercept) 164343450 NA NA NA NA
ran_pars Residual sd__Observation 1724155502 NA NA NA NA
all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    plot_model(.,type="pred",terms=c("Taxon","Extraction"))

all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    plot_model(.,type="pred",terms=c("Species"),pred.type = "re",ci.lvl = NA)+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

all_data  %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(bases_post_fastp ~ Taxon + Extraction + (1 | Species/Sample), data = ., REML = TRUE) %>%
    r.squaredGLMM()
           R2m      R2c
[1,] 0.1544544 0.427248

3.2 Quality-filtering

all_data %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp*100) %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(qf_bases), sd(qf_bases))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of quality-filtered proportion of reads")
Mean and standard deviation of quality-filtered proportion of reads
Taxon REF DREX1 DREX2
Amphibian 84.6±1.5 91.1±4.8 87.5±4.0
Reptile 89.9±6.6 90.5±7.4 88.3±7.6
Mammal 92.2±2.4 89.0±5.1 91.3±1.8
Bird 70.6±16.5 62.5±29.3 66.7±19.3
Control 3.3±2.3 9.8±11.5 27.5±3.4
all_data %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp*100) %>%
    select(Library,Species,Extraction,qf_bases,Taxon) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=qf_bases, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="High-quality data (%)",x="Extraction method")

data<-all_data  %>%
    mutate(qf_bases=bases_post_fastp/bases_pre_fastp,unique_sample=factor(1:nrow(all_data))) %>%
    filter(Taxon != "Control") 

M <-MASS::glmmPQL(qf_bases~ Taxon+Extraction,random=~1|Species/Sample,
               family="quasibinomial",data=data)

Anova_table<-car::Anova(M,test.statistic="Wald",type="III")%>%
    mutate(F=Chisq/Df) # Approximate F value
n <- nrow(M$data)  # Total number of observations
df_fixed <- length(fixef(M))  # Number of fixed effect parameters
df_random <- length(ranef(M))  # Number of random effect parameters (or levels)
approx_residual_df <- n - df_fixed - df_random # Approx. residual Df
data.frame(Anova_table,
           p_value=pf(Anova_table$F, Anova_table$Df, approx_residual_df, lower.tail = FALSE))%>%
  select(F,p_value)
                     F      p_value
(Intercept) 74.7499062 2.356732e-12
Taxon       14.3961730 2.851930e-07
Extraction   0.1329218 8.757746e-01
broom.mixed::tidy(M) %>%
    tt()
effect term estimate std.error df statistic p.value
fixed (Intercept) 2.02494956 0.2446263 46 8.2777260 1.155959e-10
fixed TaxonReptile 0.18760837 0.3161277 8 0.5934575 5.692695e-01
fixed TaxonMammal 0.32238134 0.3215166 8 1.0026896 3.453708e-01
fixed TaxonBird -1.25655677 0.2879146 8 -4.3643388 2.398635e-03
fixed ExtractionDREX1 -0.08597233 0.1846303 46 -0.4656458 6.436663e-01
fixed ExtractionDREX2 -0.07012683 0.1850391 46 -0.3789838 7.064436e-01
plot_model(M,type="pred",terms=c("Taxon","Extraction"))