我有一个看起来像这样的数据框:
> dataf
gene_name log.pval log.fc
1 TNIP3 0.631094308480128 1.1506542453426
2 NP_808217.1 0.772551410812959 -0.808513121902601
3 Q6ZQS7_HUMAN 0.120675567248996 1.20907596997608
4 Q8TA88_HUMAN 1.44494939509263 2.06552619626619
5 THAP5 0.766534371246673 1.13887517622511
6 BHLHB5 0.596510549476608 -0.890552737707591
7 XP_372337.2 0.495557665215951 -0.896828380212937
8 Q7Z2R7_HUMAN 0.448309045489994 -1.81608512967947
9 IFITM2 0.453896048090579 1.63881366592662
10 - 1.28765417650748 -0.95944121604072
我想要做的是使用log.fc
列和 y 轴使用log.pval
列绘制 x 轴图。
但是为什么这个命令失败了?
plot(dataf$log.fc,dataf$log.pval,xlim=c(-2,2),ylim=c(0,5));
它给了我以下错误消息:
Error in if (equidist) seq.int(1/(2 * ny), 1 - 1/(2 * ny), by = 1/ny) else (yat[-1L] + :
missing value where TRUE/FALSE needed
Calls: plot -> plot.factor -> spineplot -> spineplot.default
In addition: Warning messages:
1: In spineplot.default(x, y, ...) :
x axis is on a cumulative probability scale, 'xlim' must be in [0,1]
2: In spineplot.default(x, y, ...) :
y axis is on a cumulative probability scale, 'ylim' must be in [0,1]
Execution halted
更新:输入数据和完整代码。
输入文件mydata.csv
:
Name|ID_REF|xN|xK|xD|yN|yK|yD
NP_542387.1|H200009573|116.344280521|157.34028943|165.521765927|129.382514146|146.734212992|111.564081171
TRIM65|H200014442|139.28243473|142.8535596|183.543914333|151.18230485|126.012002369|117.926240275
Q96JT4_HUMAN|H200011360|6.30624488407|6.23249632016|8.84048629421|3.67264474301|1.78051470938|3.00185438056
MTCH1|H200017563|962.796774907|984.466886282|1225.05018099|1006.35251622|959.451292287|1004.59648311
RPL35|H200008624|13462.3246704|16392.4312316|16124.489617|15544.2347278|16130.3829312|12319.5940351
代码:
data<-read.delim("~/Desktop/mydata.csv",sep="|",na.strings="",header=TRUE,blank.lines.skip=TRUE,fill=FALSE)
data<- data[complete.cases(data),]
gname <- as.matrix(data$Name);
idref <- as.matrix(data$ID_REF);
xN <- data$xN;
xK <- data$xK;
xD <- data$xD;
yN <- data$yN;
yK <- data$yK;
yD <- data$yD;
xK_yK <- as.matrix(cbind(xK,yK));
xD_yD <- as.matrix(cbind(xD,yD));
xN_yN <- as.matrix(cbind(xN,yN));
#--------------------------------------------------
# Functions
#--------------------------------------------------
foldchange_geom_avg <- function(r,g) {
# r and g is vector with value >=1
# Essentially the same with M value
# >0 if r is upregulated
# <0 if r is downregulated
return (sum(log2(r/g))/length(r));
}
ranked.ttest <- function(gn,v1,v2,test.type="t") {
# By default t.test assume unequal variance
# and applies Welsh modification
# These samples are assumed also to be independent
# and the distributions are strictly normal
# Mann-Whitney (Wilcoxon) assume no-normality
# and with arbitrary sample
out <- c();
for (i in 1:nrow(v1)) {
test <- t.test(v1[i,],v2[i,],paired=FALSE)
if (test.type == "mw") {
test <- wilcox.test(v1[i,],v2[i,],alternative="two.sided",paired=FALSE)
}
pval <- test$p.value;
log.pval <- -log10(pval);
gene_name <- gn[i];
log.fc <- foldchange_geom_avg(v1[i,],v2[i,]);
abs.fc <- abs(log.fc)
if (is.numeric(pval) ){
out <- rbind(out,cbind(gene_name,log.pval,pval,log.fc));
}
}
#out <- out[with(out,order(pvalue,genename))]
return (data.frame(out));
}
test <- ranked.ttest(gname,xK_yK,xN_yN)
plot(test$log.fc,test$log.pval,xlim=c(-2,2),ylim=c(0,5));
这是会话信息:
> sessionInfo()
R version 2.15.2 (2012-10-26)
Platform: i386-apple-darwin9.8.0/i386 (32-bit)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
情节是这样的: