Chapter 7 PacBio selection
7.3 TOP10 candidate samples
#Select top10
selection <- left_join(phylogenetic_diversity,microbial_fraction,by=join_by(sample==sample)) %>%
arrange(-phylogenetic) %>%
unique() %>%
select(sample) %>%
slice(1:20) %>%
pull()
#Print statistics
left_join(phylogenetic_diversity,microbial_fraction,by=join_by(sample==sample)) %>%
left_join(sample_metadata,by="sample") %>%
arrange(-phylogenetic) %>%
unique() %>%
slice(1:20) %>%
select(sample, phylogenetic, read_fraction, treatment, trial) %>%
mutate(number_of_genomes=genome_counts_filt %>%
select(all_of(c("genome",selection))) %>%
summarise(across(starts_with("D"), ~ sum(. != 0))) %>% t()) %>%
rename(phylogenetic_diversity=phylogenetic,microbial_fraction=read_fraction) %>%
tt()
sample | phylogenetic_diversity | microbial_fraction | treatment | trial | number_of_genomes |
---|---|---|---|---|---|
D300670 | 10.830700 | 60.88 | T3 | I | 340 |
D300644 | 10.509730 | 37.41 | T1 | I | 150 |
D300666 | 9.184970 | 62.57 | T3 | I | 170 |
D300665 | 9.047756 | 49.03 | T1 | I | 206 |
D300625 | 9.039263 | 66.57 | T1 | I | 259 |
D300657 | 8.981349 | 66.28 | T3 | K | 289 |
D300651 | 8.966840 | 59.94 | T1 | K | 401 |
D300639 | 8.699889 | 60.57 | T3 | I | 272 |
D300635 | 8.536139 | 61.39 | T1 | I | 280 |
D300648 | 8.470311 | 66.89 | T3 | I | 288 |
D300656 | 8.433429 | 42.48 | T1 | K | 248 |
D300629 | 8.422548 | 55.78 | T3 | K | 260 |
D300637 | 8.167404 | 62.95 | T3 | I | 308 |
D300664 | 8.051396 | 68.78 | T3 | I | 321 |
D300654 | 8.021459 | 63.11 | T1 | K | 333 |
D300659 | 8.007220 | 69.24 | T3 | K | 370 |
D300660 | 7.967956 | 62.56 | T3 | K | 332 |
D300672 | 7.960063 | 61.17 | T0 | K | 50 |
D300658 | 7.926715 | 58.91 | T3 | K | 337 |
D300653 | 7.807393 | 69.50 | T1 | K | 327 |
All the samples with top phylogenetic diversity metrics have similar microbial fraction.
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
vertical_tree <- gheatmap(vertical_tree, phylum_colors, offset=-0.6, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic) +
new_scale_fill()
#Add genome counts of d0
genome_counts_selection <- genome_counts_filt %>%
select(all_of(c("genome",selection))) %>%
column_to_rownames(var="genome") %>% tss()
vertical_tree <- gheatmap(vertical_tree, log10(genome_counts_selection), offset=-0.4, width=0.3, colnames=TRUE, colnames_angle=90, font.size=3, colnames_position="top", colnames_offset_y = 15) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "lightblue", high = "#315b7d", na.value="#f4f4f4") +
new_scale_fill()
vertical_tree +
theme(legend.position='none')
Top 10 diversity samples are sorted from left to right.