1

我有一个txf_df子集gene.list$entrez,然后找到唯一数量的成绩单列表。然后txf_df转换为txf_grange.

现在,我想创建一个包含 15 个unique基因的 for 循环,在每次迭代时,txf_grange仅按特定基因对对象进行子集化。

代码:

# Subset by the Entrez IDs
txf_df <- txf_df %>% filter(geneName %in% gene.list$entrez)

# Find the number of common transcripts
unique <- unique(txf_df$geneName)
length(unique)

# Recast this dataframe back to a GRanges object
txf_grange <- makeGRangesFromDataFrame(txf_df, keep.extra.columns=T)

# For each of the 15 genes, subset the Granges objects by only the gene
for (i in gene.list["entrez"]) {
  for (j in txf_grange$geneName) {
    if (i==j) {
      assign(paste0("gene.", i), 1:j) <- txf_grange[j,]
    }
  }
}

数据:

> dput(head(txf_df))
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "16", class = "factor"), 
    start = c(12058964L, 12059311L, 12059311L, 12060052L, 12060198L, 
    12060198L), end = c(12059311L, 12060052L, 12061427L, 12060198L, 
    12060877L, 12061427L), width = c(348L, 742L, 2117L, 147L, 
    680L, 1230L), strand = structure(c(1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("+", "-", "*"), class = "factor"), type = structure(c(3L, 
    1L, 1L, 2L, 1L, 1L), .Label = c("J", "I", "F", "L", "U"), class = "factor"), 
    txName = structure(list(c("uc002dbv.3", "uc010buy.3", "uc010buz.3"
    ), c("uc002dbv.3", "uc010buy.3"), "uc010buz.3", c("uc002dbv.3", 
    "uc010buy.3"), "uc010buy.3", "uc002dbv.3"), class = "AsIs"), 
    geneName = structure(list("608", "608", "608", "608", "608", 
        "608"), class = "AsIs")), row.names = c(NA, 6L), class = "data.frame")

> dput(head(gene.list))
structure(list(Name = c("AQP8", "CLCA1", "GUCA2B", "ZG16", "CA4", 
"CA1"), Pvalue = c(3.24077275512836e-22, 2.57708986670727e-21, 
5.53491656902485e-21, 4.14482213350182e-20, 2.7795892896524e-19, 
1.23890644641685e-18), adjPvalue = c(8.3845272720681e-18, 6.66744690314504e-17, 
1.43199361473811e-16, 1.07234838237959e-15, 7.19135341018869e-15, 
3.20529875816967e-14), logFC = c(-3.73323340223377, -2.96422555675244, 
-3.34493724166712, -2.87787132076412, -2.87670608798164, -3.15664667432159
), entrez = c(AQP8 = "343", CLCA1 = "1179", GUCA2B = "2981", 
ZG16 = "653808", CA4 = "762", CA1 = "759")), row.names = c(NA, 
6L), class = "data.frame")
4

0 回答 0