Chapter 7 PacBio selection

load("data/data.Rdata")

7.1 Read fraction

microbial_fraction <- read_tsv("data/microbial_fraction.tsv") %>%
  mutate(sample=str_replace_all(sample,"\\.lib1_1", "")) %>%
  mutate(read_fraction=str_remove(read_fraction,"%") %>% as.numeric())

7.2 Phylogenetic diversity

phylogenetic_diversity <- genome_counts %>% 
            column_to_rownames(var="genome") %>% 
            select(where(~!all(. == 0))) %>% 
            hilldiv(.,q=1,tree=genome_tree) %>% 
            t() %>% 
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="sample")

7.3 TOP10 candidate samples

#Select top10
selection <- left_join(phylogenetic_diversity,microbial_fraction,by=join_by(sample==sample)) %>%
    arrange(-phylogenetic) %>%
    unique() %>% 
    select(sample) %>%
    slice(1:20) %>%
    pull()

#Print statistics
left_join(phylogenetic_diversity,microbial_fraction,by=join_by(sample==sample)) %>%
    left_join(sample_metadata,by="sample") %>% 
    arrange(-phylogenetic) %>%
    unique() %>% 
    slice(1:20) %>%
    select(sample, phylogenetic, read_fraction, treatment, trial) %>%
    mutate(number_of_genomes=genome_counts_filt %>%
               select(all_of(c("genome",selection))) %>%
               summarise(across(starts_with("D"), ~ sum(. != 0))) %>% t()) %>%
    rename(phylogenetic_diversity=phylogenetic,microbial_fraction=read_fraction) %>%
    tt()
tinytable_e78vnbv6ahhhokj29irf
sample phylogenetic_diversity microbial_fraction treatment trial number_of_genomes
D300670 10.830700 60.88 T3 I 340
D300644 10.509730 37.41 T1 I 150
D300666 9.184970 62.57 T3 I 170
D300665 9.047756 49.03 T1 I 206
D300625 9.039263 66.57 T1 I 259
D300657 8.981349 66.28 T3 K 289
D300651 8.966840 59.94 T1 K 401
D300639 8.699889 60.57 T3 I 272
D300635 8.536139 61.39 T1 I 280
D300648 8.470311 66.89 T3 I 288
D300656 8.433429 42.48 T1 K 248
D300629 8.422548 55.78 T3 K 260
D300637 8.167404 62.95 T3 I 308
D300664 8.051396 68.78 T3 I 321
D300654 8.021459 63.11 T1 K 333
D300659 8.007220 69.24 T3 K 370
D300660 7.967956 62.56 T3 K 332
D300672 7.960063 61.17 T0 K 50
D300658 7.926715 58.91 T3 K 337
D300653 7.807393 69.50 T1 K 327

All the samples with top phylogenetic diversity metrics have similar microbial fraction.

vertical_tree <- force.ultrametric(genome_tree,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
  arrange(match(genome, genome_tree$tip.label)) %>%
  select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

vertical_tree <- gheatmap(vertical_tree, phylum_colors, offset=-0.6, width=0.1, colnames=FALSE) +
    scale_fill_manual(values=colors_alphabetic) +
    new_scale_fill()

#Add genome counts of d0
genome_counts_selection <- genome_counts_filt %>%
          select(all_of(c("genome",selection))) %>% 
          column_to_rownames(var="genome") %>% tss()

vertical_tree <- gheatmap(vertical_tree, log10(genome_counts_selection), offset=-0.4, width=0.3, colnames=TRUE, colnames_angle=90, font.size=3, colnames_position="top", colnames_offset_y = 15) +
    vexpand(.08) +
    coord_cartesian(clip = "off") +
    scale_fill_gradient(low = "lightblue", high = "#315b7d", na.value="#f4f4f4") +
    new_scale_fill()

vertical_tree +
  theme(legend.position='none')

Top 10 diversity samples are sorted from left to right.