NMDS analysis of transcriptomic data

packages

library(tidyr)
library(dplyr)
library(ggplot2)
library(vegan)
library(DESeq2)
library(phyloseq)

data

import data and format for deseq2:

exp <- read.csv("transcript_df.csv")
row.names(exp) <- exp$orf_id
exp <- exp[,-1]
exp<- exp[-c(1,2),]

meta <- read.csv("transcript_df_anno.csv")
meta$X <- paste("X", meta$X, sep="")
row.names(meta) <- meta$X
meta <- meta[,-1]
meta$b12 <- factor(meta$b12)

deseq2 variance stabilization

convert data to deseq2 experiment object:

dds<- DESeqDataSetFromMatrix(countData = exp,
                              colData = meta,
                              design = ~ iron)
dds

## class: DESeqDataSet 
## dim: 49068 36 
## metadata(1): version
## assays(1): counts
## rownames(49068): contig_0_29_403_- contig_10000_73_1464_+ ...
##   contig_99_126_578_- contig_9_13_630_+
## rowData names(0):
## colnames(36): X30_00_A X30_00_B ... X03_14_B X03_14_C
## colData names(3): condition iron b12

variance stabilize:

vsd <- vst(dds, blind=TRUE)

NMDS

convert variance stabilization results to a dataframe and a matrix:

vsd_df <- data.frame((assay(vsd)))
vsd_mat <- data.matrix(vsd_df)
dim(vsd_mat) #check data dimensions

## [1] 49068    36

49068 contigs, 36 samples

convert data matrix to phyloseq object:

VSD <- otu_table(vsd_mat, taxa_are_rows = TRUE)
META <- sample_data(meta)
PS <- phyloseq(VSD, META)

ordination of euclidean distances between samples:

ord <- ordinate(PS, "NMDS", "euclidean", k=4, trymax=50)

## Wisconsin double standardization
## Run 0 stress 0.0813784 
## Run 1 stress 0.08335308 
## Run 2 stress 0.08301179 
## Run 3 stress 0.08242736 
## Run 4 stress 0.0811948 
## ... New best solution
## ... Procrustes: rmse 0.03442481  max resid 0.09677127 
## Run 5 stress 0.08190442 
## Run 6 stress 0.08141065 
## ... Procrustes: rmse 0.07235335  max resid 0.2091962 
## Run 7 stress 0.08199026 
## Run 8 stress 0.08247531 
## Run 9 stress 0.08336137 
## Run 10 stress 0.08204965 
## Run 11 stress 0.08229842 
## Run 12 stress 0.08189722 
## Run 13 stress 0.08080644 
## ... New best solution
## ... Procrustes: rmse 0.05827346  max resid 0.1551595 
## Run 14 stress 0.08341273 
## Run 15 stress 0.08100476 
## ... Procrustes: rmse 0.07046188  max resid 0.1956304 
## Run 16 stress 0.083073 
## Run 17 stress 0.08326212 
## Run 18 stress 0.08469642 
## Run 19 stress 0.08118127 
## ... Procrustes: rmse 0.08038645  max resid 0.1950295 
## Run 20 stress 0.08252871 
## Run 21 stress 0.08089398 
## ... Procrustes: rmse 0.06324051  max resid 0.1813839 
## Run 22 stress 0.08198818 
## Run 23 stress 0.08175902 
## Run 24 stress 0.08272844 
## Run 25 stress 0.08208705 
## Run 26 stress 0.08144825 
## Run 27 stress 0.08213497 
## Run 28 stress 0.08301335 
## Run 29 stress 0.08130311 
## ... Procrustes: rmse 0.07339434  max resid 0.1987087 
## Run 30 stress 0.08052603 
## ... New best solution
## ... Procrustes: rmse 0.0197389  max resid 0.0459596 
## Run 31 stress 0.08254388 
## Run 32 stress 0.08280695 
## Run 33 stress 0.08375472 
## Run 34 stress 0.08155639 
## Run 35 stress 0.08196547 
## Run 36 stress 0.08176495 
## Run 37 stress 0.08153639 
## Run 38 stress 0.08097941 
## ... Procrustes: rmse 0.05886901  max resid 0.158476 
## Run 39 stress 0.08240893 
## Run 40 stress 0.08143792 
## Run 41 stress 0.0806406 
## ... Procrustes: rmse 0.01939933  max resid 0.05533644 
## Run 42 stress 0.08297224 
## Run 43 stress 0.08202115 
## Run 44 stress 0.08196255 
## Run 45 stress 0.08180099 
## Run 46 stress 0.08137868 
## Run 47 stress 0.08097331 
## ... Procrustes: rmse 0.0572615  max resid 0.1781588 
## Run 48 stress 0.08310565 
## Run 49 stress 0.08250652 
## Run 50 stress 0.08172108 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     49: no. of iterations >= maxit
##      1: stress ratio > sratmax

Plot NMDS ordination results:

p<-plot_ordination(PS, ord, type="samples")+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=b12, shape = iron), size =3)  + scale_shape_manual(values= c(21,24,22)) +scale_fill_manual(values=c("#333ED4", "#A1D5DE", "#A0D636", "#EEDE04", "#FFA52C", "#FD0100"))  +guides(fill = guide_legend(override.aes=list(shape=21)))

p

ggsave("NMDS_newcolors.pdf", width = 6, height = 4)

PERMANOVA

perform PERMANOVA on euclidean distances between samples:

set.seed(1)

vdist = vegdist(t(vsd_mat), method = "euclidean")

adonis2(vdist ~ iron+b12, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ iron + b12, data = meta)
##          Df SumOfSqs      R2       F Pr(>F)    
## iron      1   383745 0.24756 12.3834  0.001 ***
## b12       5   267688 0.17269  1.7277  0.008 ** 
## Residual 29   898669 0.57975                   
## Total    35  1550101 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOSIM

ano<- with(meta, anosim(vdist, condition))
summary(ano)

## 
## Call:
## anosim(x = vdist, grouping = condition) 
## Dissimilarity: euclidean 
## 
## ANOSIM statistic R: 0.6357 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.113 0.137 0.164 0.205 
## 
## Dissimilarity ranks between and within classes:
##          0%    25%   50%    75% 100%   N
## Between   6 172.25 330.5 481.75  630 594
## hi_0    285 290.00 295.0 340.50  386   3
## hi_10    67  68.50  70.0  79.00   88   3
## hi_11   186 203.50 221.0 274.00  327   3
## hi_12    81 167.00 253.0 260.50  268   3
## hi_13   157 222.00 287.0 302.00  317   3
## hi_14   203 273.00 343.0 349.00  355   3
## lo_0     19  25.50  32.0  35.50   39   3
## lo_10    13  28.50  44.0  48.50   53   3
## lo_11     5   7.00   9.0   9.50   10   3
## lo_12     4   5.50   7.0   9.00   11   3
## lo_13    36  38.50  41.0  42.00   43   3
## lo_14     1   1.50   2.0   2.50    3   3