-1

我的数据大致如下所示,我想为每个参与者 ( v001)计算一个变量,其中包含最后两个可用测量值之间的差异(从lnslope1lnslope9)。每个受试者至少有两个测量值。

我的问题是:

我怎样才能在 R 中做到这一点?我已经阅读了有关该diff功能的信息,但我不确定它是否可以在这里使用。我是否必须以长格式重组数据才能进行此计算?这是数据:

structure(list(v001 = c(10002, 10004, 10005, 10006, 10007, 10011, 
10012, 10018), lnslope1 = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), lnslope2 = c(NA, NA, 
0.313091787977149, 0.800960043896479, NA, NA, 0, 0.246092484299754
), lnslope3 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), lnslope4 = c(NA, 0.218445030532656, 
NA, NA, NA, NA, 0.505548566665147, NA), lnslope5 = c(0.0507723253734231, 
NA, -0.0361572285993463, NA, -0.133531392624523, -0.0824189464154196, 
NA, -0.186877373329815), lnslope6 = c(0.606135803570316, NA, 
NA, NA, -0.0408887702539783, 0.304548524450922, NA, 0.099090902644231
), lnslope7 = c(0.192160005794242, NA, NA, 1.37147927533475, 
NA, 0.485507815781701, NA, 0.0307716586667537), lnslope8 = c(0.10951852580649, 
NA, NA, 1.53234783071453, 0.145860850410924, 0.604821224703469, 
NA, 0.0692660582117757), lnslope9 = c(0.374693449441411, NA, 
NA, 0.996237878364571, NA, 0.852777326151829, NA, 0.0299842570512681
)), .Names = c("v001", "lnslope1", "lnslope2", "lnslope3", "lnslope4", 
"lnslope5", "lnslope6", "lnslope7", "lnslope8", "lnslope9"), row.names = c(NA, 
8L), class = "data.frame")
4

2 回答 2

0

这是一种使用已定义函数和应用的迂回方式(测试是您的数据)。我喜欢这种方式,因为每一步都有明确的定义:

   # Finds the difference between first and last non-zero element
find_difference <- function(row) {
  # Remove NAs
  row <- row[!is.na(row)]

  # Find number of non-NA entries
  len <- length(row)

  # Check to see if there is more than 1 non-NA observation
  if (len > 1) {
    difference <- row[len] - row[len - 1]
    return(difference)

  # If not more than one non-NA observation return NA
  } else {
    return(NA)
  }


}

# Use apply across each row (MARGIN = 1) with defined function
# Exclude the first column because it contains the ID
test$diff <- apply(test[, 2:ncol(test)], MARGIN = 1, FUN = find_difference)

结果:

   v001 lnslope1  lnslope2 lnslope3  lnslope4    lnslope5    lnslope6   lnslope7   lnslope8   lnslope9       diff
1 10002       NA        NA       NA        NA  0.05077233  0.60613580 0.19216001 0.10951853 0.37469345  0.2651749
2 10004       NA        NA       NA 0.2184450          NA          NA         NA         NA         NA         NA
3 10005       NA 0.3130918       NA        NA -0.03615723          NA         NA         NA         NA -0.3492490
4 10006       NA 0.8009600       NA        NA          NA          NA 1.37147928 1.53234783 0.99623788 -0.5361100
5 10007       NA        NA       NA        NA -0.13353139 -0.04088877         NA 0.14586085         NA  0.1867496
6 10011       NA        NA       NA        NA -0.08241895  0.30454852 0.48550782 0.60482122 0.85277733  0.2479561
7 10012       NA 0.0000000       NA 0.5055486          NA          NA         NA         NA         NA  0.5055486
8 10018       NA 0.2460925       NA        NA -0.18687737  0.09909090 0.03077166 0.06926606 0.02998426 -0.0392818
于 2018-01-03T17:45:14.797 回答
0

考虑以下基本 R 方法:

  • by(按v001组对数据帧进行切片),
  • tail(..., 2)(每组返回最后两个)
  • sapplydiff()遍历列以计算行差异

数据 (v001组出现5次的随机数据)

set.seed(101)
df <- data.frame(v001=rep(c(10002, 10004, 10005, 10006, 10007, 10011, 10012, 10018), 5))
df[paste0("lnslope", 1:9)] <- replicate(9, runif(40))

# BOTTOM 16 (last two per each v001)

tail(df, 16)    
#     v001   lnslope1   lnslope2   lnslope3   lnslope4    lnslope5  lnslope6   lnslope7    lnslope8   lnslope9
# 25 10002 0.92331888 0.84324054 0.71833452 0.87557727 0.359044717 0.4376301 0.44279478 0.225614349 0.76226362
# 26 10004 0.79571976 0.71550340 0.33939503 0.16735989 0.008593605 0.1863236 0.89727380 0.561463483 0.92876406
# 27 10005 0.07121255 0.01908119 0.08122143 0.46907169 0.235711577 0.9454490 0.05384848 0.768000195 0.83164735
# 28 10006 0.38940777 0.30508025 0.03723433 0.65222974 0.106234733 0.1453450 0.66423805 0.633419973 0.17826814
# 29 10007 0.40645122 0.88294798 0.77308879 0.03446071 0.611033974 0.7789505 0.82934498 0.343626148 0.64859692
# 30 10011 0.65935508 0.94134682 0.99508226 0.43545232 0.204697003 0.8133834 0.75196439 0.198905340 0.67138895
# 31 10012 0.42334715 0.23441801 0.14658643 0.14976436 0.214610423 0.2201670 0.03444567 0.002378824 0.88840185
# 32 10018 0.32098445 0.93666583 0.03984487 0.45890584 0.016491745 0.3165820 0.64558797 0.849219944 0.18576634
# 33 10002 0.19773073 0.56670198 0.56567790 0.61886157 0.328194365 0.2641242 0.96276149 0.678308439 0.49316356
# 34 10004 0.16317009 0.84279040 0.88858587 0.95629334 0.269595276 0.5218472 0.36051843 0.203191312 0.13070689
# 35 10005 0.52331108 0.82130118 0.87072166 0.10063420 0.913872405 0.2031901 0.97042921 0.983532294 0.48476033
# 36 10006 0.91347865 0.27997285 0.98174943 0.22771539 0.417871804 0.6531599 0.49181493 0.752674565 0.60324084
# 37 10007 0.20677272 0.04730114 0.87974690 0.55501376 0.690565649 0.2772224 0.93198232 0.393392928 0.89320133
# 38 10011 0.81428302 0.22494063 0.51040174 0.77102964 0.900012892 0.4086745 0.60127222 0.090300324 0.64120345
# 39 10012 0.02016720 0.67309265 0.33437535 0.47966908 0.207708929 0.8367247 0.35175022 0.598136016 0.16957180
# 40 10018 0.92480441 0.95893086 0.61333531 0.88103731 0.461033160 0.4350532 0.08774474 0.676346199 0.01014118

输出

diff_list <- by(df, df$v001, FUN=function(d) sapply(tail(d, 2)[-1], diff))

diff_matrix <- do.call(rbind, diff_list)
diff_matrix
#         lnslope1    lnslope2   lnslope3   lnslope4     lnslope5   lnslope6   lnslope7    lnslope8   lnslope9
# 10002 -0.7255881 -0.27653857 -0.1526566 -0.2567157 -0.030850352 -0.1735058  0.5199667  0.45269409 -0.2691001
# 10004 -0.6325497  0.12728700  0.5491908  0.7889335  0.261001671  0.3355236 -0.5367554 -0.35827217 -0.7980572
# 10005  0.4520985  0.80221999  0.7895002 -0.3684375  0.678160829 -0.7422589  0.9165807  0.21553210 -0.3468870
# 10006  0.5240709 -0.02510740  0.9445151 -0.4245144  0.311637071  0.5078149 -0.1724231  0.11925459  0.4249727
# 10007 -0.1996785 -0.83564685  0.1066581  0.5205530  0.079531675 -0.5017281  0.1026373  0.04976678  0.2446044
# 10011  0.1549279 -0.71640619 -0.4846805  0.3355773  0.695315889 -0.4047089 -0.1506922 -0.10860502 -0.0301855
# 10012 -0.4031799  0.43867464  0.1877889  0.3299047 -0.006901494  0.6165577  0.3173046  0.59575719 -0.7188300
# 10018  0.6038200  0.02226503  0.5734904  0.4221315  0.444541415  0.1184712 -0.5578432 -0.17287374 -0.175625
于 2018-01-03T18:21:31.680 回答