我遇到了一个非常奇怪的问题,即双精度矩阵变成字符串矩阵。我将代码缩减为以下问题:
num.samples <- nrow(expr.matrix)
num.genes <- ncol(expr.matrix)
gene.names <- colnames(expr.matrix)
# Define a function which returns a vector in order to...
execute.per.gene <- function(target.gene, ...) {
# Uninteresting code
x <- expr.matrix[,setdiff(1:num.genes, target.gene)]
y <- expr.matrix[,target.gene]
rf <- randomForest(x, y, mtry=10, ntree=100, importance=TRUE)
# Calculate importance measure
im <- importance(rf)[,"IncNodePurity"]
# Divide by number of samples
im / num.samples
}
# ... execute mclapply!
all.output <- mclapply(1:num.genes, execute.per.gene, mc.cores=mc.cores)
# Initialise matrix
weight.matrix <- matrix(0.0, nrow=num.genes, ncol=num.genes)
rownames(weight.matrix) <- gene.names
colnames(weight.matrix) <- gene.names
# And now I merge the results from 'all.output' into the weight.matrix
for (target.gene in 1:num.genes) {
# Get result
im <- all.output[[target.gene]]
# Find which rows to change for this column
cand.tf.idx <- match(names(im), gene.names)
# Merge results into output matrix
weight.matrix[cand.tf.idx, target.gene] <- im
}
# And suddenly, the matrix consists of a bunch of strings!
if (!is.numeric(weight.matrix[[1,1]])) { # dafuq
cat("\nEncountered strings! :/\n")
print(weight.matrix)
# recently added this for debugging purposes:
print(sapply(all.output, class))
browser()
}
输出是:
Encountered strings! :/
G1 G7 G9 G23 G26 G28 G29 G33 G44 G48 G50 G52 G55 G59 G63 G64 G69 G70
G1 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G7 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G9 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G23 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G26 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
...
而且我不知道为什么矩阵会变成一堆包含零的字符串。毕竟,在该execute.per.gene
方法中,我做的最后一件事是将向量除以一个整数,此时问题不会引发异常,因此此时im
必须仍然是双精度向量。
有人看到问题出在哪里吗?我究竟做错了什么?
更新,im
应该总是看起来像下面这样。如果im
已经由字符串组成,我im / num.samples
认为会失败。
> dput(im)
structure(c(3.86421872658217, 0.0600404651226161, 0.0729843866848986,
0.0556398483535666, 0.0488815568218319, 0.0526059937835038, 0.170688282373908,
0.129655447072086, 0.174050696716209, 0.244770969072866, 0.170282014024477,
0.100440545265572, 0.0634773436494396, 0.0696835665372604, 0.118303002740336,
0.0493612110879677, 0.103414668075989, 0.0149516634700066, 0.0377397612656266,
0.0462366818296757, 0.0534595079995701, 0.0418429987271517, 0.0521335103883387,
0.0454590053400778, 0.0620792864477719, 0.0528642019860386, 0.0440233200010488,
....
2.4293680818691, 0.0455845647048088, 0.0480473721971548, 0.0493418345253576,
0.0468391879447859, 1.53509517636789, 0.0639471582428624, 0.155340800410008,
0.0668494853135931, 0.0436381864919185, 1.09024170028797, 0.0649503734307499,
0.0490042073829033, 0.0304435411561372, 0.034892331733943, 0.0759421587532521,
0.0666974014679768, 0.913196971375135, 0.0550660353121449, 1.36191204205922,
3.63194611493454, 0.177078251458191, 0.17856008667256, 0.0499985787306069,
0.0465138307009715, 0.071656156183379, 0.0441178391009568, 0.239933902772204,
0.0719828575374175, 0.0654148345872996, 0.920668929212975, 0.0454979263784418,
2.92899170564573, 0.0208273505572265, 0.0397416566013167, 0.197310579354446,
0.0313568556466712), .Names = c("sample_2", "sample_3", "sample_4",
"sample_5", "sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
"sample_41", "sample_42", "sample_43", "sample_44", "sample_45",
"sample_46", "sample_47", "sample_48", "sample_49", "sample_50",
"sample_51", "sample_52", "sample_53", "sample_54", "sample_55",
"sample_56", "sample_57", "sample_58", "sample_59", "sample_60",
"sample_61", "sample_62", "sample_63", "sample_64", "sample_65",
"sample_66", "sample_67", "sample_68", "sample_69", "sample_70",
"sample_71", "sample_72", "sample_73", "sample_74", "sample_75",
....
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
))
以及有关gene.names
和的更多信息:num.samples
num.genes
> dput(num.samples)
1643L
> dput(num.genes)
805L
> dput(gene.names)
c("sample_1", "sample_2", "sample_3", "sample_4", "sample_5",
"sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
...
"sample_741", "sample_742", "sample_743", "sample_744", "sample_745",
"sample_746", "sample_747", "sample_748", "sample_749", "sample_750",
"sample_751", "sample_752", "sample_753", "sample_754", "sample_755",
"sample_756", "sample_757", "sample_758", "sample_759", "sample_760",
"sample_761", "sample_762", "sample_763", "sample_764", "sample_765",
"sample_766", "sample_767", "sample_768", "sample_769", "sample_770",
"sample_771", "sample_772", "sample_773", "sample_774", "sample_775",
"sample_776", "sample_777", "sample_778", "sample_779", "sample_780",
"sample_781", "sample_782", "sample_783", "sample_784", "sample_785",
"sample_786", "sample_787", "sample_788", "sample_789", "sample_790",
"sample_791", "sample_792", "sample_793", "sample_794", "sample_795",
"sample_796", "sample_797", "sample_798", "sample_799", "sample_800",
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
)