我想计算Name
给定染色体 ( Chr
) 中标记 ( ) 之间的距离。对象dist1.alldown
(下游距离)和dist1.allup
(上游距离)正是我想要的。但是,下面的脚本计算效率很低(我的真实数据可能包含一百万个标记,这个循环很耗时)。
df <- 'Name Chr Position
GGaluGA001820 chr1 34388
Gga_rs16686671 chr1 67781
GGaluGA001841 chr1 80477
Gga_rs15995401 chr1 111556
Gga_rs15995393 chr1 112481
GGaluGA001890 chr1 149690
GGaluGA001902 chr1 176450
Gga_rs14688751 chr1 185573
GGaluGA001921 chr1 202425
GGaluGA001945 chr1 235155'
df <- read.table(text=df, header=T)
probes <- df
probes.split <- split(probes, probes$Chr)
####### Loop to infer distance upstream #####
{dist1.all <- NULL
for(k in 1:length(probes.split)){
probescx <- probes.split[[k]]
probescx <- probescx[order(probescx$Position, decreasing=F),]
for(i in 1:nrow(probescx)){
v <- vector()
v[k] <- k^2; print(paste(k,i))
rowx <- probescx[i,]
rowxm1 <- probescx[i-1,]
if(nrow(rowxm1) > 0){
lab <- rowx[1,1:2]
dist1 <- rowx[1,3] - rowxm1[1,3]
dist1 <- as.data.frame(dist1)
dist1 <- cbind(lab, dist1)
dist1.all <- rbind(dist1.all, dist1)
}
}
}
}
### Save a different object
dist1.allup <- dist1.all
##background of up object
dist1.allupback <- dist1.allup
### Loop to infer distance downstream
{dist1.all <- NULL
for(k in 1:length(probes.split)){
probescx <- probes.split[[k]]
probescx <- probescx[order(probescx$Position, decreasing=F),]
for(i in 1:nrow(probescx)){
v <- vector()
v[k] <- k^2; print(paste(k,i))
rowx <- probescx[i,]
rowxm1 <- probescx[i+1,]
if(nrow(rowxm1) > 0){
lab <- rowx[1,1:2]
dist1 <- rowx[1,3] - rowxm1[1,3]
dist1 <- as.data.frame(dist1)
dist1 <- cbind(lab, dist1)
dist1.all <- rbind(dist1.all, dist1)
}
}
}
}
### Save a different object
dist1.alldown <- dist1.all
##background of down object
dist1.alldownback <- dist1.alldown
## Turn distance in positive integers
dist1.alldown$dist1 <- dist1.alldown$dist1 * -1
获得有效方法的一些想法或已知工具?谢谢!