您对方差稳定矩阵的预期目的是什么?然后你打算使用矩阵来计算差异丰度吗?在本教程中,作者(Susan Holmes 和 Joey McMurdie)参考本教程使用改变的几何平均公式进行差异丰度测试,例如
#BiocManager::install("phyloseq")
#BiocManager::install("DESeq2")
library(phyloseq)
library(DESeq2)
#> Loading required package: S4Vectors
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> Loading required package: IRanges
#> Loading required package: GenomicRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
# Load example data
otufile = system.file("extdata", "GP_otu_table_rand_short.txt.gz", package = "phyloseq")
mapfile = system.file("extdata", "master_map.txt", package = "phyloseq")
trefile = system.file("extdata", "GP_tree_rand_short.newick.gz", package = "phyloseq")
qiimex = import_qiime(otufile, mapfile, trefile, showProgress = FALSE)
#> Processing map file...
#> Processing otu/tax file...
#> Reading file into memory prior to parsing...
#> Detecting first header line...
#> Header is on line 2
#> Converting input file to a table...
#> Defining OTU table...
#> Parsing taxonomy table...
#> Processing phylogenetic tree...
#> /Library/Frameworks/R.framework/Versions/4.1/Resources/library/phyloseq/extdata/GP_tree_rand_short.newick.gz ...
#> Warning in import_qiime(otufile, mapfile, trefile, showProgress = FALSE):
#> treefilename failed import. It will not be included.
qiimex
#> phyloseq-class experiment-level object
#> otu_table() OTU Table: [ 500 taxa and 26 samples ]
#> sample_data() Sample Data: [ 26 samples by 7 sample variables ]
#> tax_table() Taxonomy Table: [ 500 taxa by 7 taxonomic ranks ]
# Select samples
qiimex@sam_data$SampleType
#> [1] "Freshwater (creek)" "Freshwater (creek)" "Freshwater (creek)"
#> [4] "Soil" "Soil" "Mock"
#> [7] "Mock" "Mock" "Skin"
#> [10] "Freshwater" "Feces" "Skin"
#> [13] "Tongue" "Feces" "Skin"
#> [16] "Tongue" "Ocean" "Ocean"
#> [19] "Ocean" "Freshwater" "Soil"
#> [22] "Sediment (estuary)" "Sediment (estuary)" "Sediment (estuary)"
#> [25] "Feces" "Feces"
qiimex_subset <- subset_samples(qiimex, SampleType == "Soil" | SampleType == "Mock")
# Convert to DESeq2 object
DESeq2_Object <- phyloseq_to_deseq2(qiimex_subset, ~ SampleType)
#> converting counts to integer mode
#> Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
#> design formula are characters, converting to factors
# calculate geometric means prior to estimate size factors
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
geoMeans = apply(counts(DESeq2_Object), 1, gm_mean)
DESeq2_dds = estimateSizeFactors(DESeq2_Object, geoMeans = geoMeans)
DESeq2_dds = DESeq(DESeq2_dds, fitType="local")
#> using pre-existing size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing
# Explore results
res = results(DESeq2_dds)
res = res[order(res$padj, na.last=NA), ]
alpha = 0.01
sigtab = res[(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(qiimex_subset)[rownames(sigtab), ], "matrix"))
head(sigtab)
#> baseMean log2FoldChange lfcSE stat pvalue padj
#> 184403 3316.1856 -11.604876 1.446460 -8.022948 1.032372e-15 1.827298e-13
#> 193343 820.0846 -10.147175 1.317754 -7.700354 1.356898e-14 1.200855e-12
#> 10113 316.3938 -9.216756 1.313276 -7.018143 2.248365e-12 1.326535e-10
#> 234421 238.0599 -11.167192 1.657220 -6.738510 1.600195e-11 7.080861e-10
#> 526081 259.6722 -9.346145 1.471139 -6.352999 2.111570e-10 7.474959e-09
#> 313166 1322.7565 -9.911298 1.593579 -6.219519 4.986800e-10 1.471106e-08
#> Kingdom Phylum Class Order
#> 184403 Bacteria Firmicutes Clostridia Clostridiales
#> 193343 Bacteria Firmicutes Clostridia Clostridiales
#> 10113 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales
#> 234421 Bacteria Firmicutes Clostridia Clostridiales
#> 526081 Bacteria Firmicutes Clostridia Clostridiales
#> 313166 Bacteria Bacteroidetes Bacteroidia Bacteroidales
#> Family Genus Species
#> 184403 Lachnospiraceae <NA> <NA>
#> 193343 Lachnospiraceae Roseburia Eubacteriumrectale
#> 10113 Enterobacteriaceae <NA> <NA>
#> 234421 Lachnospiraceae Ruminococcus <NA>
#> 526081 Lachnospiraceae Oribacterium <NA>
#> 313166 Bacteroidaceae Bacteroides <NA>
posigtab = sigtab[sigtab[, "log2FoldChange"] > 0, ]
posigtab = posigtab[, c("baseMean", "log2FoldChange", "lfcSE", "padj", "Phylum", "Class", "Family", "Genus")]
posigtab
#> baseMean log2FoldChange lfcSE padj Phylum
#> 303295 201.335216 6.506674 1.204792 1.305775e-06 Proteobacteria
#> 5552 196.834531 6.309543 1.178371 1.518862e-06 Proteobacteria
#> 236550 69.552022 8.770931 1.815173 1.840288e-05 Proteobacteria
#> 113626 844.163976 7.207704 1.503844 2.078799e-05 Acidobacteria
#> 97627 171.891263 6.169004 1.307597 2.812963e-05 Proteobacteria
#> 89763 52.875334 8.375284 1.795633 3.426524e-05 Actinobacteria
#> 306684 149.608143 5.964643 1.346820 9.322811e-05 Bacteroidetes
#> 218985 50.742401 8.315832 1.882803 9.334941e-05 Chloroflexi
#> 102382 335.108981 6.747414 1.555112 1.267530e-04 Proteobacteria
#> 206632 332.777997 7.486585 1.747926 1.553274e-04 Verrucomicrobia
#> 573607 178.082806 6.729167 1.614098 2.360541e-04 Acidobacteria
#> 593006 146.954357 7.392336 1.781983 2.469451e-04 Proteobacteria
#> 279590 151.624630 6.576054 1.637287 3.873396e-04 Acidobacteria
#> 89337 56.919233 6.692225 1.663102 3.873396e-04 Actinobacteria
#> 218035 43.876643 7.133246 1.793490 4.373432e-04 Actinobacteria
#> 368027 18.296878 6.844054 1.791678 7.384186e-04 Actinobacteria
#> 572242 62.103316 6.140545 1.616489 7.802034e-04 Planctomycetes
#> 588604 211.108277 6.589139 1.738856 7.862979e-04 Proteobacteria
#> 212596 183.109699 6.931159 1.846832 8.501324e-04 Acidobacteria
#> 237963 113.390704 7.549096 2.013483 8.501324e-04 Proteobacteria
#> 548077 23.508540 7.205060 1.921985 8.501324e-04 Actinobacteria
#> 222914 348.580255 7.393221 2.040394 1.319435e-03 Verrucomicrobia
#> 265094 519.413992 5.885380 1.636179 1.389556e-03 Actinobacteria
#> 341901 26.562097 7.381503 2.068411 1.512002e-03 Actinobacteria
#> 104310 11.372452 6.157420 1.746257 1.715256e-03 Actinobacteria
#> 561842 51.066519 5.459834 1.549686 1.715256e-03 Proteobacteria
#> 248299 16.577945 6.700539 1.906851 1.736686e-03 Actinobacteria
#> 166835 868.093822 6.189342 1.766318 1.762950e-03 Proteobacteria
#> 369734 32.758200 6.711075 1.919352 1.775080e-03 Proteobacteria
#> 295422 15.101303 6.565460 1.913842 2.221501e-03 Proteobacteria
#> 137099 25.876135 6.367122 1.867180 2.254420e-03 Verrucomicrobia
#> 254706 13.039735 6.358471 1.862435 2.254420e-03 Acidobacteria
#> 204462 137.941401 6.613294 1.963268 2.572456e-03 Acidobacteria
#> 160337 18.708823 5.893400 1.783191 3.172144e-03 Actinobacteria
#> 144065 23.284353 7.191156 2.184408 3.260211e-03 Proteobacteria
#> 113959 894.081156 6.136411 1.870146 3.325954e-03 Acidobacteria
#> 223948 10.185674 6.003017 1.833057 3.341251e-03 Acidobacteria
#> 160135 8.669323 5.764444 1.764057 3.366660e-03 Actinobacteria
#> 244840 8.505259 5.736708 1.759948 3.404914e-03 Planctomycetes
#> 240591 179.115203 5.662464 1.749499 3.628632e-03 Proteobacteria
#> 547752 22.387044 6.156040 1.918045 3.921972e-03 Proteobacteria
#> 163176 17.574738 5.803532 1.823396 4.230077e-03 Proteobacteria
#> 321885 24.312098 6.270669 2.013227 5.172611e-03 Planctomycetes
#> 329327 16.600143 5.717837 1.839840 5.212980e-03 Fibrobacteres
#> 262115 25.392319 7.316622 2.362343 5.319945e-03 SC4
#> 512616 15.661404 5.632695 1.854032 6.385192e-03 Proteobacteria
#> 261663 25.664154 4.953787 1.643615 6.812189e-03 Chloroflexi
#> 257151 16.884984 4.909367 1.658737 8.015407e-03 Bacteroidetes
#> 238929 25.265076 4.509092 1.544313 8.856346e-03 Verrucomicrobia
#> 322045 6.504135 5.355636 1.834044 8.856346e-03 Proteobacteria
#> Class Family Genus
#> 303295 Deltaproteobacteria Haliangiaceae <NA>
#> 5552 Alphaproteobacteria Caulobacteraceae Phenylobacterium
#> 236550 Betaproteobacteria Oxalobacteraceae Massilia
#> 113626 Chloracidobacteria <NA> <NA>
#> 97627 Betaproteobacteria Comamonadaceae Hylemonella
#> 89763 Actinobacteria <NA> <NA>
#> 306684 Sphingobacteria <NA> <NA>
#> 218985 SOGA31 <NA> <NA>
#> 102382 Betaproteobacteria Oxalobacteraceae Massilia
#> 206632 Spartobacteria Spartobacteriaceae MC18
#> 573607 Chloracidobacteria <NA> <NA>
#> 593006 Gammaproteobacteria Sinobacteraceae <NA>
#> 279590 Acidobacteria <NA> <NA>
#> 89337 Actinobacteria Pseudonocardiaceae Actinoalloteichus
#> 218035 Actinobacteria Microbacteriaceae <NA>
#> 368027 Actinobacteria <NA> <NA>
#> 572242 Planctomycea Gemmataceae <NA>
#> 588604 Alphaproteobacteria Rhodospirillaceae <NA>
#> 212596 Solibacteres Solibacteraceae CandidatusSolibacter
#> 237963 Deltaproteobacteria <NA> <NA>
#> 548077 Actinobacteria <NA> <NA>
#> 222914 Verrucomicrobiae Verrucomicrobiasubdivision3 <NA>
#> 265094 Actinobacteria Micromonosporaceae Micromonospora
#> 341901 Actinobacteria Nocardioidaceae <NA>
#> 104310 Actinobacteria Frankiaceae <NA>
#> 561842 Alphaproteobacteria <NA> <NA>
#> 248299 Actinobacteria <NA> <NA>
#> 166835 Betaproteobacteria Burkholderiaceae Burkholderia
#> 369734 Alphaproteobacteria Rhodospirillaceae <NA>
#> 295422 Betaproteobacteria Alcaligenaceae Sutterella
#> 137099 Verrucomicrobiae <NA> <NA>
#> 254706 Chloracidobacteria <NA> <NA>
#> 204462 Solibacteres Solibacteraceae CandidatusSolibacter
#> 160337 Actinobacteria Frankiaceae <NA>
#> 144065 Alphaproteobacteria Acetobacteraceae <NA>
#> 113959 <NA> <NA> <NA>
#> 223948 Chloracidobacteria <NA> <NA>
#> 160135 Actinobacteria Streptomycetaceae Streptomyces
#> 244840 Planctomycea Pirellulaceae Pirellula
#> 240591 Alphaproteobacteria Phyllobacteriaceae <NA>
#> 547752 Betaproteobacteria Burkholderiaceae Burkholderia
#> 163176 Betaproteobacteria Rhodocyclaceae Methyloversatilis
#> 321885 Planctomycea Gemmataceae <NA>
#> 329327 Fibrobacteres Fibrobacteraceae Fibrobacter
#> 262115 <NA> <NA> <NA>
#> 512616 Gammaproteobacteria Sinobacteraceae <NA>
#> 261663 Bljii12 Dolo_23 <NA>
#> 257151 Sphingobacteria <NA> <NA>
#> 238929 Verrucomicrobiae <NA> <NA>
#> 322045 Betaproteobacteria Oxalobacteraceae Massilia
# Plot the results
library(ggplot2)
theme_set(theme_bw())
sigtabgen = subset(sigtab, !is.na(Genus))
# Phylum order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Phylum, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Phylum = factor(as.character(sigtabgen$Phylum), levels=names(x))
# Genus order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Genus, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Genus = factor(as.character(sigtabgen$Genus), levels=names(x))
ggplot(sigtabgen, aes(y=Genus, x=log2FoldChange, color=Phylum)) +
geom_vline(xintercept = 0.0, color = "gray", size = 0.5) +
geom_point(size=6) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
由reprex 包于 2021-10-08 创建(v2.0.1)
该教程于 2021 年 5 月发布,因此它是最新的,它们基本上跳过了与您的问题相关的“中间步骤”,但如果您出于其他原因需要方差稳定矩阵,则可能的解决方案是使用Varistran应用 Anscombe 的方差稳定变换(它还提供了一些不错的绘图功能),例如
#devtools::install_github("MonashBioinformaticsPlatform/varistran")
#BiocManager::install("edgeR")
library(varistran)
#> Loading required package: grid
counts <- DESeq2_Object@assays@data$counts
design <- model.matrix(~ qiimex_subset@sam_data$SampleType)
y <- vst(counts, design=design)
#> Dispersion estimated as 0.5672283
# "y" contains the transformed counts
y
#> CC1 CL3 Even1 Even2 Even3 SV1
#> 338272 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 0.7855066
#> 289855 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 0.7855066
#> 287759 0.2967411 -0.3192927 -0.3192927 -0.3192927 1.5903105 4.0624033
#> 334839 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 144065 3.2103181 6.8071310 -0.3192927 -0.3192927 -0.3192927 0.7855066
#> 18643 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 218985 6.1359268 3.1727994 -0.3192927 -0.3192927 -0.3192927 8.2069561
#> 238997 -0.3192927 0.5037921 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 245893 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 131765 -0.3192927 -0.3192927 -0.3192927 -0.3192927 1.5903105 0.7855066
#> 536982 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 351629 5.0182785 4.5937241 5.4609218 4.0571176 2.8729479 5.3692361
#> 321069 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> 512157 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927 -0.3192927
#> ...
#> attr(,"lib.size")
#> CC1 CL3 Even1 Even2 Even3 SV1
#> 84395.682 57904.079 1560.595 4272.039 15659.007 38299.983
#> attr(,"true.lib.size")
#> CC1 CL3 Even1 Even2 Even3 SV1
#> 31125 37942 10184 7169 6597 34353
#> attr(,"lib.size.method")
#> [1] "TMM normalization"
#> attr(,"cpm")
#> [1] FALSE
#> attr(,"method")
#> [1] "anscombe.nb"
#> attr(,"dispersion")
#> [1] 0.5672283
plot_stability(y, counts, design=design)
#> Warning: Transformation introduced infinite values in continuous x-axis
#> Warning: Transformation introduced infinite values in continuous x-axis
plot_biplot(y)
plot_heatmap(y, n=50, cluster_samples = TRUE)
#> Registered S3 method overwritten by 'seriation':
#> method from
#> reorder.hclust vegan
由reprex 包于 2021-10-08 创建(v2.0.1)