html - 如何在 R 中执行脚本的循环和迭代？

Question

我正在运行一个 R 脚本来分析一些生物数据。下面提供了片段数据和脚本的示例。这个数据文件有很多列（但我想关注第 5 列 - 基因）。我有 100 多个这样的数据文件（将所有文件中的第 5 个基因列视为感兴趣的列）。目前我在 R 中分别运行每个文件，这个过程很乏味。我想同时为所有数据文件运行一个 R 脚本，并根据文件名将所有数据保存在不同的文件夹中。是否可以在脚本中一次循环或迭代和分析所有数据文件。请帮我解决这个问题。

谢谢，

图菲克

格式化问题和修订数据框

插入：要读取的文件的名称

M3.1_IPA.txt
M8.1_IPA.txt
M8.2_IPA.txt
M8.3_IPA.txt

1.加载基因集进行分析

Data_file <- read.delim(file = "./M3.1_IPA.txt", stringsAsFactors = FALSE, check.names = FALSE, row.names = NULL)

dput(head(Data_file, 5))
structure(list(row.names = c("M3.1_ALPP", "M3.1_ALS2CR14", "M3.1_ANKRD30B", 
"M3.1_ARL16", "M3.1_BCYRN1"), X = 1:5, Module = c("M3.1", "M3.1", 
"M3.1", "M3.1", "M3.1"), Title = c("Cell Cycle", "Cell Cycle", 
"Cell Cycle", "Cell Cycle", "Cell Cycle"), Gene = c("ALPP", "ALS2CR14", 
"ANKRD30B", "ARL16", "BCYRN1"), Probes = c("ILMN_1693789", "ILMN_1787314", 
"ILMN_1730678", "ILMN_2188119", "ILMN_1678757"), Module_gene = c("M3.1_ALPP", 
"M3.1_ALS2CR14", "M3.1_ANKRD30B", "M3.1_ARL16", "M3.1_BCYRN1"
)), row.names = c(NA, 5L), class = "data.frame")

Module_10_1 <- Data_file[,5]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)

选择要使用的主题数据库（即 TSS 周围的生物体和距离）

data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")

计算富集

motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)

##Export the plots##
pdf(paste0(Type, "_", Module, "_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
    hist(auc, main="Module_10_1", xlab="AUC histogram",
      breaks=100, col="#ff000050", border="darkred")
    nes3 <- (3*sd(auc)) + mean(auc)
    abline(v=nes3, col="red")
dev.off()

选择重要的主题和/或注释到 TF

motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3,

motifAnnot=motifAnnotations_hgnc)

##Export the Motif enrichment analysis results###
write.csv(motifEnrichmentTable, file ="./motifEnrichmentTable.csv", row.names = F, sep = ",")

确定每个基序具有最佳富集的基因

motifEnrichmentTable_wGenes_v1 <- addSignificantGenes(motifEnrichmentTable, 
             rankings=motifRankings, 
             geneSets=Module_10_1_geneLists)

##Export the Motif enrichment analysis results##
write.csv(motifEnrichmentTable_wGenes_v1, 
          file ="./motifEnrichmentTable_wGenes_v1.csv", 
          row.names = F, sep = ",")

绘制几个主题

geneSetName <- "Module_10_1_Genelist"
selectedMotifs <- c("cisbp__M6275", 

sample(motifEnrichmentTable$motif, 2))
par(mfrow=c(2,2))

Type_v1 <- "Few_motifs"
##Export the plots
pdf(paste0(Type_v1, "_", Module, "_Sig_Genes_Plot.pdf"), height = 5, width = 5)
getSignificantGenes(Module_10_1_geneLists, 
                    motifRankings,
                     signifRankingNames=selectedMotifs,
                     plotCurve=TRUE, maxRank=5000, genesFormat="none",
                     method="aprox")
dev.off()

RcisTarget的最终输出是一个data.table并导出为html

motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"

library(DT)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE], 
                escape = FALSE, # To show the logo
                filter="top", options=list(pageLength=100))
html_test <- "dtable.html"
saveWidget(dtable, html_test)

搭建网络并导出为html格式

signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists, 
                                    motifRankings,
                                    signifRankingNames=signifMotifNames,
                                    plotCurve=TRUE, maxRank=5000, 
                                    genesFormat="incidMatrix",
                                    method="aprox")$incidMatrix

library(reshape2)

edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")

library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),   
                    label=c(motifs, genes),    
                    title=c(motifs, genes), # tooltip 
                    shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
                    color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE, 
                                     nodesIdSelection = TRUE)
##Export the network##
visSave(Network, file ="Network.html")

score 1 · Accepted Answer

我根据给出的示例文件名编写了它：

file_names <- c("M3.1_IPA.txt","M8.1_IPA.txt","M8.2_IPA.txt","M8.3_IPA.txt")

最简单的方法是迭代1:length(file_names)，并为每次迭代创建一组唯一的输出文件。根据您的问题，您希望将结果保存到不同的文件夹。这可以通过提取文件名（删除 .txt）然后使用该名称创建一个新目录来完成。然后，您可以将此迭代的所有输出保存到新目录

然后为下一次迭代创建一个新目录，因此您不会保存以前的结果。

for(i in 1:length(file_names)){
  Data_file <- read.csv(file = paste0("./",file_names[i]), stringsAsFactors = FALSE, check.names = FALSE)

  Module_10_1 <- Data_file[,1]
  Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
  
  data(motifAnnotations_hgnc)
  motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
  
  
  motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
  Type = "Enrichment"
  Module = "M10_1"
  auc <- getAUC(motifs_AUC)
  
  # Extract the file name without the extension, so the directory with the results
  # will correspond to this name:
  new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
  #Create the new directory:
  dir.create(new_directory_name)

  ##Export the plots##
  # Save to the new directory
  pdf(paste0(new_directory_name,"/",Type,"_", Module,"_AUC_Histogram_Plot.pdf"), height = 5, width = 10) 
  hist(auc, main="Module_10_1", xlab="AUC histogram",
       breaks=100, col="#ff000050", border="darkred")
  nes3 <- (3*sd(auc)) + mean(auc)
  abline(v=nes3, col="red")
  dev.off()

}

我省略了脚本的其余部分，因为保存motifEnrichmentTable_wGenes_v1.csv,_Sig_Genes_Plot.pdf和其他输出的解决方案与上面的相同_AUC_Histogram_Plot.pdf。您只需要将这些输出写入新目录，使用paste0(new_directory_name,"/",<insert-output-name>).

如果您有很多文件，file_names您可以在本地目录中搜索与正确模式匹配的文件，而不是手动创建向量。例如

all_files <- dir()
file_names <- grep(all_files,pattern = "*.txt$",value = TRUE)

将返回本地目录中的所有 .txt 文件。如果只需要以“M”开头的 .txt 文件，您可以进一步细化：

all_files <- dir()
file_names <- grep(all_files,pattern = "^M.*.txt$",value = TRUE)

如果不是本地目录中的所有 .txt 文件都是输入文件，您会这样做，并且您可能需要进一步细化正则表达式模式，直到您只捕获所需的 txt 文件。

对于 .html 文件，这有点困难，因为saveWidget()目前不允许您使用相对文件路径进行保存，但有一个使用.html 的解决方案withr::with_dir()。这应该适用于第 7 部分：

withr::with_dir(new_directory_name, saveWidget(dtable, file="dtable.html"))

这应该适用于第 8 部分：

withr::with_dir(new_directory_name, saveWidget(Network, file="Network.html"))

score 0 · Accepted Answer

7. RcisTarget最终输出的是一个data.table并导出为html

motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
  resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
  Type_v2 <- "Motifs_Table"
  
  # Extract the file name without the extension, so the directory with the results
  # will correspond to this name:
  new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
  #Create the new directory:
  dir.create(new_directory_name)
  
  library(DT)
  library(webshot)
  library(htmltools)
  library(xml2)
  ## export the data table to html file format###
  dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE], 
                      escape = FALSE, # To show the logo
                      filter="top", options=list(pageLength=1000))
  html_test <- paste0(new_directory_name,"/","dtable.html")
  ##To save files in the pdf format###
  #saveWidget(dtable, html_test)
  #webshot(html_test, paste0(new_directory_name,"/", "motif.pdf"))

8.搭建网络并导出为html格式

# Extract the file name without the extension, so the directory with the results
  # will correspond to this name:
  new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
  #Create the new directory:
  dir.create(new_directory_name)
  
  signifMotifNames <- motifEnrichmentTable$motif[1:3]
  incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists, 
                                         motifRankings,
                                         signifRankingNames=signifMotifNames,
                                         plotCurve=TRUE, maxRank=5000, 
                                         genesFormat="incidMatrix",
                                         method="aprox")$incidMatrix
  
  library(reshape2)
  
  edges <- melt(incidenceMatrix)
  edges <- edges[which(edges[,3]==1),1:2]
  colnames(edges) <- c("from","to")
  
  library(visNetwork)
  motifs <- unique(as.character(edges[,1]))
  genes <- unique(as.character(edges[,2]))
  nodes <- data.frame(id=c(motifs, genes),   
                      label=c(motifs, genes),    
                      title=c(motifs, genes), # tooltip 
                      shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
                      color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
  Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE, 
                                                     nodesIdSelection = TRUE)
  ##Export the network##
  #visSave(Network, file ="Network.html")

  html_test_v1 <- paste0(new_directory_name,"/","Network.html")
  #saveWidget(Network, html_test_v1)
  #webshot(html_test_v1, paste0(new_directory_name,"/", "Network.pdf"))

html - 如何在 R 中执行脚本的循环和迭代？

2 回答 2

Related

Reference