让我们用一个例子来获取 DESeq2 结果:
library(DESeq2)
library(zebrafishRNASeq)
data(zfGenes)
head(zfGenes)
Ctl1 Ctl3 Ctl5 Trt9 Trt11 Trt13
ENSDARG00000000001 304 129 339 102 16 617
ENSDARG00000000002 605 637 406 82 230 1245
ENSDARG00000000018 391 235 217 554 451 565
ENSDARG00000000019 2979 4729 7002 7309 9395 3349
dds = DESeqDataSetFromMatrix(zfGenes[rowMeans(zfGenes)>30,],
data.frame(grp=gsub("[0-9]*","",colnames(zfGenes))),~grp)
dds = DESeq(dds)
res = data.frame(results(dds))
我们需要得到一个注解,可以是一个床文件,你使用与import()
rtracklayer 相同的函数:
library(rtracklayer)
gtf = import("ftp://ftp.ensembl.org/pub/release-100/gtf/danio_rerio/Danio_rerio.GRCz11.100.gtf.gz")
genes = gtf[gtf$type=="gene"]
head(genes)
GRanges object with 6 ranges and 20 metadata columns:
seqnames ranges strand | source type score phase
<Rle> <IRanges> <Rle> | <factor> <factor> <numeric> <integer>
[1] 4 17308-18211 - | ensembl gene <NA> <NA>
[2] 4 31259-45642 + | ensembl gene <NA> <NA>
[3] 4 38344-45396 + | ensembl gene <NA> <NA>
[4] 4 48519-53370 - | ensembl gene <NA> <NA>
[5] 4 57034-64703 - | ensembl gene <NA> <NA>
[6] 4 69619-72011 + | ensembl gene <NA> <NA>
您需要将 deseq2 结果的行名与列匹配,在本例中为gene_id,如果您的表是符号,则将其与相应的匹配:
annotation = as.data.frame(genes)[match(rownames(res),genes$gene_id),]
head(as.data.frame(genes)[match(rownames(res),genes$gene_id),])
seqnames start end width strand source type score phase
12565 9 34112067 34121839 9773 - ensembl_havana gene NA NA
12564 9 34089156 34113209 24054 + ensembl_havana gene NA NA
484 4 15081385 15103696 22312 - ensembl_havana gene NA NA
480 4 15011341 15059876 48536 + ensembl_havana gene NA NA
21778 12 33484458 33537126 52669 + ensembl_havana gene NA NA
这里有不止一个染色体,所以我们只做 25 号染色体:
res = cbind(res,annotation[,1:5])
res = res[which(res$seqnames == 25),]
plot(res$start,res$log2FoldChange,xaxt="n",xlab="Pos on chr25",ylab="Log2FC")
axis(side=1,5e6*1:8,paste(5 * 1:8,"Mb"))