taxcumsum = tdt[, .N, by = TotalCounts]
setkey(taxcumsum, TotalCounts)
taxcumsum[, CumSum := cumsum(N)]
pCumSum = ggplot(taxcumsum, aes(TotalCounts, CumSum)) + geom_point() + theme_bw() +
xlab("Filtering Threshold") + ylab("ASV Filtered")
gridExtra::grid.arrange(pCumSum, pCumSum + xlim(0, 500),
pCumSum + xlim(0, 100), pCumSum + xlim(0, 50), nrow = 2,
top = "ASVs that would be filtered vs. minimum taxa counts threshold")
mdt = fast_melt(ps)
mdt = mdt[count > 0] [!is.na(count)]
mdt[, RelativeAbundance := count / sum(count), by = taxaID]
mdt
Kingdom Phylum Class Order Family
1: Bacteria Proteobacteria Gammaproteobacteria Methylococcales Methylomonaceae
2: Bacteria Proteobacteria Gammaproteobacteria Betaproteobacteriales SC-I-84
3: Bacteria Proteobacteria Deltaproteobacteria Syntrophobacterales Syntrophobacteraceae
4: Bacteria <NA> <NA> <NA> <NA>
5: Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae
---
1043: Bacteria Proteobacteria <NA> <NA> <NA>
1044: Bacteria Actinobacteria Actinobacteria <NA> <NA>
1045: Bacteria Proteobacteria Deltaproteobacteria <NA> <NA>
1046: Bacteria Proteobacteria Gammaproteobacteria Steroidobacterales Steroidobacteraceae
1047: Bacteria Bacteroidetes Bacteroidia Bacteroidales Prolixibacteraceae
Genus taxaID SampleID count RelativeAbundance
1: Crenothrix OTU1 sa1 332 1
2: <NA> OTU10 sa1 147 1
3: Syntrophobacter OTU10 sa1 49 1
4: <NA> OTU1000 sa1 1 1
5: <NA> OTU1001 sa1 1 1
---
1043: <NA> OTU995 sa1 1 1
1044: <NA> OTU996 sa1 1 1
1045: <NA> OTU997 sa1 1 1
1046: <NA> OTU998 sa1 1 1
1047: <NA> OTU999 sa1 1 1