0

我有一个 for 循环,它将每个样本文件放在一个列表中,为该样本创建一个矩阵,然后将其存储到所有样本矩阵的一个大列表中。

这是我到目前为止所做的:

# load in data ------------------------------------------------------------------

filePaths = getGEOSuppFiles("GSE124395") 
tarF <- list.files(path = "./GSE124395/", pattern = "*.tar", full.names = TRUE) 
untar(tarF, exdir = "./GSE124395/") 
gzipF <- list.files(path = "./GSE124395/", pattern = "*.gz", full.names = TRUE) 
ldply(.data = gzipF, .fun = gunzip) 

#running test loop -------------------------------------------------------------

testlist <- c("./GSE124395//GSM3531672_P301_3_CRYOMIXED11.coutt.csv", 
 "./GSE124395//GSM3531673_P301_4_CRYOMIXED12.coutt.csv",
"./GSE124395//GSM3531674_P301_5_HEP1_1_5.coutt.csv")

LoopList_test <- list()

for (i in 1:length(testlist)){
  
  matrix_test <- read.delim(file =testlist[i])
  matrix_test <- data.frame(matrix_test[,-1], row.names=matrix_test[,1]) 
  matrix_test <- as.matrix(matrix_test) #<- makes the excel file into a matrix 
  
  colname_test <- read.delim(file =testlist[i])
  colname_test <- read.table(file = './GSE124395//GSE124395_celseq_barcodes.192.txt', header = FALSE, row.names = 1) 
  colname_test <- data.frame(colname_test[,-1], col=colname_test[,1]) 
  colname_test <- as.matrix(colname_test) 
  colnames(matrix_test) <- colname_test[,1] 
  
  LoopList_test[[i]]<-matrix_test
}

这是输出: 一个大列表中的部分输出

我希望循环将每次迭代的结果存储到它自己的矩阵中,所以如果有意义的话,我有多个矩阵而不是一个巨大的矩阵列表。我认为这涉及将这个巨大的列表拆分为子列表,或者将循环的结果存储到矩阵/数组/向量而不是列表中,或者以某种方式将每次迭代存储到循环中自己的变量中。我不知道该怎么做。

谢谢阅读!

更新:

所以这一切的重点是创建矩阵,然后将它们组合成一个矩阵。然后将这个矩阵变成一个 Seurat 对象,然后我可以对其进行聚类。

所以这就是我到目前为止所做的:基本上,我在数据集中对每个组进行了多个循环,添加了我需要的任何信息,然后获取列表,我认为我需要的函数实际上获取了一个列表,这对我有好处. 这是我目前决定的代码:

mylist<-list.files(path = "./GSE124395/", pattern = "\\.csv$",full.names = TRUE)

LoopList <- list()

for (i in 1:30){
  
  matrix_input <- read.delim(file =mylist[i])
  matrix_input <- data.frame(matrix_input[,-1], row.names=matrix_input[,1]) 
  matrix_input <- as.matrix(matrix_input) #<- makes the excel file into a matrix 
  
  colname_input <- read.delim(file =mylist[i])
  colname_input <- read.table(file = './GSE124395//GSE124395_celseq_barcodes.192.txt', header = FALSE, row.names = 1) 
  colname_input <- data.frame(colname_input[,-1], col=colname_input[,1]) 
  colname_input <- as.matrix(colname_input) 
  colnames(matrix_input) <- colname_input[,1] 
  
  colnames(matrix_input) <- paste(colnames(matrix_input), "Colorectal_Metastasis", sep = "_")
  P301_pdat <- data.frame("samples" = colnames(matrix_input), "treatment" = "Colorectal_Metastasis") 
  
  sobj <- CreateSeuratObject(counts = matrix_input, min.cells = 0, min.features = 1, 
                             project = "Patient301_Colorectal_Metastasis")
  
  LoopList[[i]]<-sobj
  #LoopList <- assign(paste0("Patient301", i), sobj )
}



# P304 loop ------------------------------------------------------------------------- 


for (i in 31:56){
  
  matrix_input <- read.delim(file =mylist[i])
  matrix_input <- data.frame(matrix_input[,-1], row.names=matrix_input[,1]) 
  matrix_input <- as.matrix(matrix_input) #<- makes the excel file into a matrix 
  
  colname_input <- read.delim(file =mylist[i])
  colname_input <- read.table(file = './GSE124395//GSE124395_celseq_barcodes.192.txt', header = FALSE, row.names = 1) 
  colname_input <- data.frame(colname_input[,-1], col=colname_input[,1]) 
  colname_input <- as.matrix(colname_input) 
  colnames(matrix_input) <- colname_input[,1] 
  
  colnames(matrix_input) <- paste(colnames(matrix_input), "Colorectal_Metastasis", sep = "_")
  P304_pdat <- data.frame("samples" = colnames(matrix_input), "treatment" = "Colorectal_Metastasis") 
  
  sobj <- CreateSeuratObject(counts = matrix_input, min.cells = 0, min.features = 1, 
                             project = "Patient304_Colorectal_Metastasis")
  
  LoopList[[i]]<-sobj
  
}

等等。然后,按照https://satijalab.org/seurat/articles/integration_large_datasets.html

sobj.list <- SplitObject(LoopList, split.by = "orig.ident")
joined <- lapply(X = LoopList, FUN = function(x) {
  x <- NormalizeData(x, verbose = FALSE)
  x <- FindVariableFeatures(x, verbose = FALSE)
})


features <- SelectIntegrationFeatures(object.list = joined)
joined <- lapply(X = joined, FUN = function(x) {
  x <- ScaleData(x, features = features, verbose = FALSE)
  x <- RunPCA(x, features = features, verbose = FALSE)
})


anchors <- FindIntegrationAnchors(object.list = joined, reduction = "rpca", 
                                  dims = 1:50)
joined.integrated <- IntegrateData(anchorset = anchors, dims = 1:50)

joined.integrated <- ScaleData(joined.integrated, verbose = FALSE)
joined.integrated <- RunPCA(joined.integrated, verbose = FALSE)
joined.integrated <- RunUMAP(joined.integrated, dims = 1:50)

DimPlot(joined.integrated, group.by = "orig.ident")
DimPlot(joined.integrated, reduction = "umap", split.by = "treatment")

我不知道这是否确实有效,但我想我会更新这个问题以反映我到目前为止所学到的东西!我想我学到的教训是看看你是否能找到一个将列表作为输入的函数嘿嘿。

4

0 回答 0