This post details test for subsampling thresholds in the Montipora capitata early life history timeseries dataset. The purpose of these tests is to determine the minimum read depth to detect differences between treatment groups for future sequencing runs.
Prepare data
Load metadata.
#create metadata and load
Metadata<-read.csv("Mcap2020/Data/ITS2/ITS2_Metadata.csv")
rownames(Metadata) <- Metadata$sample_name
Metadata <- Metadata %>%
select(code, hpf, group, lifestage)
Metadata <- as.matrix(Metadata)
Metadata <- sample_data(data.frame(Metadata))
Load profile level data.
#load data for taxonomy
tax0 <- read_tsv(
file = "Mcap2020/Data/ITS2/mcap.profiles.absolute.abund_and_meta.txt",
n_max = 6) %>%
dplyr::select(-2) %>%
gather(UID, value, -1) %>%
spread(1, value) %>%
clean_names()
tax1 <- as.matrix(tax0[, -1], dimnames = list(tax0$uid, colnames(tax0[-1])))
rownames(tax1) <- tax0$uid
tax <- tax_table(tax1)
#load data for count matrix
otu0 <- read_tsv(
file = "Mcap2020/Data/ITS2/mcap.profiles.absolute.abund_and_meta.txt", col_names = TRUE)
list<-c("UID", "sample_name", "8", "9")
colnames(otu0)<-list
otu0<-otu0%>%
select(-1) %>%
dplyr::slice(7:n()) %>%
mutate_at(2:ncol(.), as.numeric)
otu1 <- as.matrix(otu0[, -1])
rownames(otu1) <- otu0$sample_name
otu <- otu_table(otu1, taxa_are_rows = FALSE)
#combine to phyloseq object
coral <- phyloseq(otu, tax, Metadata)
coral
Note that taxa 8 is Clade C and taxa 9 is Clade D.
Next load the data for coral strain level information.
#gather the taxonomy names
taxnames <- read_tsv(
file = "Mcap2020/Data/ITS2/mcap.seqs.absolute.abund_only.txt",
n_max = 0) %>%
select(-1) %>%
names(.)
#extract clade letter
tax0 <- data_frame(
DIV = taxnames,
clade = str_extract(DIV, "[A-Z]")
)
tax1 <- as.matrix(tax0)
rownames(tax1) <- tax0$DIV
tax <- tax_table(tax1)
otu0 <- read_tsv(
file = "Mcap2020/Data/ITS2/mcap.seqs.absolute.abund_and_meta.txt") %>%
select(-1, )
otu1 <- as.matrix(otu0[, 37:113])
rownames(otu1) <- otu0$sample_name
otu <- otu_table(otu1, taxa_are_rows = FALSE)
coralDIV <- phyloseq(otu, tax, Metadata)
coralDIV
Now save the object.
save(coral, coralDIV, file = "Mcap2020/Data/ITS2/ITS2_phyloseq.RData")
Build DIV Data
Load the data.
load("Mcap2020/Data/ITS2/ITS2_phyloseq.RData")
View the number of sequence counts in our dataset.
Samps<-read_tsv(file = "Mcap2020/Data/ITS2/mcap.seqs.absolute.abund_and_meta.txt")
hist(Samps$post_taxa_id_absolute_symbiodiniaceae_seqs, breaks=30)
Samps$post_taxa_id_absolute_symbiodiniaceae_seqs
We have >10,551 sequences for all of our samples.
Generate tree.
random_tree = rtree(ntaxa(coralDIV), rooted=TRUE, tip.label=taxa_names(coralDIV))
Merge phyloseq data.
phylo_coral_DIV = merge_phyloseq(coralDIV, Metadata, random_tree)
phylo_coral_DIV
Filter out samples with low numbers in groups (n=1 per group) from phyloseq object.
phylo_coral_DIV<-ps_filter(phylo_coral_DIV, lifestage != "Metamorphosed Polyp (183 hpf)")
phylo_coral_DIV<-ps_arrange(phylo_coral_DIV, lifestage)
Reorder lifestage.
levels(as.factor(sample_data(phylo_coral_DIV)$lifestage))
sample_data(phylo_coral_DIV)$lifestage = factor(sample_data(phylo_coral_DIV)$lifestage, levels = c("Egg (1 hpf)","Embryo (5 hpf)","Embryo (38 hpf)","Embryo (65 hpf)", "Larvae (93 hpf)", "Larvae (163 hpf)", "Larvae (231 hpf)", "Metamorphosed Polyp (231 hpf)", "Attached Recruit (183 hpf)", "Attached Recruit (231 hpf)"))
levels(as.factor(sample_data(phylo_coral_DIV)$lifestage))
Assign object to a shorter name.
GP1 = phylo_coral_DIV
GP1
Display sample sequencing depth.
We will attempt subsampling at the minimum count (10,551 sequences) and in steps decreasing from 10551, 8000, 6000, 4000, 2000, 1000, 500.
DIV level analysis