library(tidyr)
library(dplyr)
library(ggplot2)
library(vegan)
library(DESeq2)
library(phyloseq)
import data and format for deseq2:
exp <- read.csv("transcript_df.csv")
row.names(exp) <- exp$orf_id
exp <- exp[,-1]
exp<- exp[-c(1,2),]
meta <- read.csv("transcript_df_anno.csv")
meta$X <- paste("X", meta$X, sep="")
row.names(meta) <- meta$X
meta <- meta[,-1]
meta$b12 <- factor(meta$b12)
convert data to deseq2 experiment object:
dds<- DESeqDataSetFromMatrix(countData = exp,
colData = meta,
design = ~ iron)
dds
## class: DESeqDataSet
## dim: 49068 36
## metadata(1): version
## assays(1): counts
## rownames(49068): contig_0_29_403_- contig_10000_73_1464_+ ...
## contig_99_126_578_- contig_9_13_630_+
## rowData names(0):
## colnames(36): X30_00_A X30_00_B ... X03_14_B X03_14_C
## colData names(3): condition iron b12
variance stabilize:
vsd <- vst(dds, blind=TRUE)
convert variance stabilization results to a dataframe and a matrix:
vsd_df <- data.frame((assay(vsd)))
vsd_mat <- data.matrix(vsd_df)
dim(vsd_mat) #check data dimensions
## [1] 49068 36
49068 contigs, 36 samples
convert data matrix to phyloseq object:
VSD <- otu_table(vsd_mat, taxa_are_rows = TRUE)
META <- sample_data(meta)
PS <- phyloseq(VSD, META)
ordination of euclidean distances between samples:
ord <- ordinate(PS, "NMDS", "euclidean", k=4, trymax=50)
## Wisconsin double standardization
## Run 0 stress 0.0813784
## Run 1 stress 0.08335308
## Run 2 stress 0.08301179
## Run 3 stress 0.08242736
## Run 4 stress 0.0811948
## ... New best solution
## ... Procrustes: rmse 0.03442481 max resid 0.09677127
## Run 5 stress 0.08190442
## Run 6 stress 0.08141065
## ... Procrustes: rmse 0.07235335 max resid 0.2091962
## Run 7 stress 0.08199026
## Run 8 stress 0.08247531
## Run 9 stress 0.08336137
## Run 10 stress 0.08204965
## Run 11 stress 0.08229842
## Run 12 stress 0.08189722
## Run 13 stress 0.08080644
## ... New best solution
## ... Procrustes: rmse 0.05827346 max resid 0.1551595
## Run 14 stress 0.08341273
## Run 15 stress 0.08100476
## ... Procrustes: rmse 0.07046188 max resid 0.1956304
## Run 16 stress 0.083073
## Run 17 stress 0.08326212
## Run 18 stress 0.08469642
## Run 19 stress 0.08118127
## ... Procrustes: rmse 0.08038645 max resid 0.1950295
## Run 20 stress 0.08252871
## Run 21 stress 0.08089398
## ... Procrustes: rmse 0.06324051 max resid 0.1813839
## Run 22 stress 0.08198818
## Run 23 stress 0.08175902
## Run 24 stress 0.08272844
## Run 25 stress 0.08208705
## Run 26 stress 0.08144825
## Run 27 stress 0.08213497
## Run 28 stress 0.08301335
## Run 29 stress 0.08130311
## ... Procrustes: rmse 0.07339434 max resid 0.1987087
## Run 30 stress 0.08052603
## ... New best solution
## ... Procrustes: rmse 0.0197389 max resid 0.0459596
## Run 31 stress 0.08254388
## Run 32 stress 0.08280695
## Run 33 stress 0.08375472
## Run 34 stress 0.08155639
## Run 35 stress 0.08196547
## Run 36 stress 0.08176495
## Run 37 stress 0.08153639
## Run 38 stress 0.08097941
## ... Procrustes: rmse 0.05886901 max resid 0.158476
## Run 39 stress 0.08240893
## Run 40 stress 0.08143792
## Run 41 stress 0.0806406
## ... Procrustes: rmse 0.01939933 max resid 0.05533644
## Run 42 stress 0.08297224
## Run 43 stress 0.08202115
## Run 44 stress 0.08196255
## Run 45 stress 0.08180099
## Run 46 stress 0.08137868
## Run 47 stress 0.08097331
## ... Procrustes: rmse 0.0572615 max resid 0.1781588
## Run 48 stress 0.08310565
## Run 49 stress 0.08250652
## Run 50 stress 0.08172108
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 49: no. of iterations >= maxit
## 1: stress ratio > sratmax
Plot NMDS ordination results:
p<-plot_ordination(PS, ord, type="samples")+theme_bw() + theme(text = element_text(size=14)) + geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") + geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=b12, shape = iron), size =3) + scale_shape_manual(values= c(21,24,22)) +scale_fill_manual(values=c("#333ED4", "#A1D5DE", "#A0D636", "#EEDE04", "#FFA52C", "#FD0100")) +guides(fill = guide_legend(override.aes=list(shape=21)))
p
ggsave("NMDS_newcolors.pdf", width = 6, height = 4)
perform PERMANOVA on euclidean distances between samples:
set.seed(1)
vdist = vegdist(t(vsd_mat), method = "euclidean")
adonis2(vdist ~ iron+b12, data = meta)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = vdist ~ iron + b12, data = meta)
## Df SumOfSqs R2 F Pr(>F)
## iron 1 383745 0.24756 12.3834 0.001 ***
## b12 5 267688 0.17269 1.7277 0.008 **
## Residual 29 898669 0.57975
## Total 35 1550101 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ano<- with(meta, anosim(vdist, condition))
summary(ano)
##
## Call:
## anosim(x = vdist, grouping = condition)
## Dissimilarity: euclidean
##
## ANOSIM statistic R: 0.6357
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.113 0.137 0.164 0.205
##
## Dissimilarity ranks between and within classes:
## 0% 25% 50% 75% 100% N
## Between 6 172.25 330.5 481.75 630 594
## hi_0 285 290.00 295.0 340.50 386 3
## hi_10 67 68.50 70.0 79.00 88 3
## hi_11 186 203.50 221.0 274.00 327 3
## hi_12 81 167.00 253.0 260.50 268 3
## hi_13 157 222.00 287.0 302.00 317 3
## hi_14 203 273.00 343.0 349.00 355 3
## lo_0 19 25.50 32.0 35.50 39 3
## lo_10 13 28.50 44.0 48.50 53 3
## lo_11 5 7.00 9.0 9.50 10 3
## lo_12 4 5.50 7.0 9.00 11 3
## lo_13 36 38.50 41.0 42.00 43 3
## lo_14 1 1.50 2.0 2.50 3 3