0

例子

以下是个人的一些数据id = 1

id time status
--------------
1  t    status

t是某个事件发生的时间,并且status1事件是否发生或未0发生(在这种情况下t是研究的持续时间)。

说介于和t之间。a2a3

我的目标是将我的数据转换为以下内容:

id period start stop status
---------------------------
1  1     0     a1   0      
1  2     a1    a2   0      
1  3     a2    t    status  

个人 1 的总时间分为三个区间,其中没有事件,(0, a1)并且(a1, a2)

问题

你能想出一种有效的方法来编写一个输入数据集和向量a=(a1, a2, ..., aK)并输出转换后的数据集的 R 函数吗?


编辑

第 1 部分 我被问到一个具体的例子。这是一个:

    id time status
    --------------
    1  5    1

a1=1, a2=3, a3=7.

第 2 部分我还被要求展示我的尝试。这里是

> data <- data.frame(id=1, time=5, status=1)
> a <- c(1, 3, 7)
> N <- nrow(data)
> data$period <- ifelse(data$time < a[1], 1,
+                       ifelse(data$time < a[2], 2,
+                              ifelse(data$time < a[3], 3, 4)))
> 
> 
> dataTemp1 <- data.frame(matrix(nrow=N, ncol=ncol(data)))
> names(dataTemp1) <- names(data)
> dataTemp2 <- data.frame(matrix(nrow=N, ncol=ncol(data)))
> names(dataTemp2) <- names(data)
> dataTemp3 <- data.frame(matrix(nrow=N, ncol=ncol(data)))
> names(dataTemp3) <- names(data)
> dataTemp4 <- data.frame(matrix(nrow=N, ncol=ncol(data)))
> names(dataTemp4) <- names(data)
> 
> for(j in 1:N)
+ {
+   if(data[j, "period"] == 1){
+     data[j, "start"] <- 0
+     data[j, "stop"] <- data[j, "time"]
+   } else if(data[j, "period"] == 2){
+     dataTemp1[j, c("id", "time", "period")] <-
+       data[j, c("id", "time", "period")]
+     dataTemp1[j, "start"] <- 0
+     dataTemp1[j, "stop"] <- a[1]
+     dataTemp1[j, "status"] <- 0
+     
+     data[j, "start"] <- a[1]
+     data[j, "stop"] <- data[j, "time"] 
+   } else if(data[j, "period"] == 3){
+     dataTemp1[j, c("id", "time", "period")] <-
+       data[j, c("id", "time", "period")]
+     dataTemp1[j, "start"] <- 0
+     dataTemp1[j, "stop"] <- a[1]
+     dataTemp1[j, "status"] <- 0
+     
+     dataTemp2[j, c("id", "time", "period")] <-
+       data[j, c("id", "time", "period")]
+     dataTemp2[j, "start"] <- a[1]
+     dataTemp2[j, "stop"] <- a[2]
+     dataTemp2[j, "status"] <- 0
+     
+     data[j, "start"] <- a[2]
+     data[j, "stop"] <- data[j, "time"]     
+   } else if(data[j, "period"] == 4){
+     dataTemp1[j, c("id", "time", "period")] <-
+       data[j, c("id", "time", "period")]
+     dataTemp1[j, "start"] <- 0
+     dataTemp1[j, "stop"] <- a[1]
+     dataTemp1[j, "status"] <- 0
+     
+     dataTemp2[j, c("id", "time", "period")] <-
+       data[j, c("id", "time", "period")]
+     dataTemp2[j, "start"] <- a[1]
+     dataTemp2[j, "stop"] <- a[2]
+     dataTemp2[j, "status"] <- 0
+     
+     dataTemp3[j, c("id", "time", "period")] <-
+       data[j, c("id",  "time", "period")]
+     dataTemp3[j, "start"] <- a[2]
+     dataTemp3[j, "stop"] <- a[3]
+     dataTemp3[j, "status"] <- 0
+     
+     data[j, "start"] <- a[3]
+     data[j, "stop"] <- data[j, "time"] 
+   }
+ }
> 
> dataTemp1 <- dataTemp1[complete.cases(dataTemp1), ]
> dataTemp2 <- dataTemp2[complete.cases(dataTemp2), ]
> dataTemp3 <- dataTemp3[complete.cases(dataTemp3), ]
> dataTemp4 <- dataTemp4[complete.cases(dataTemp4), ]
> 
> data <- rbind(data, dataTemp1, dataTemp2, dataTemp3, dataTemp4)
> data[, "period"] <- ifelse(data[, "start"] == 0, 1,
+                            ifelse(data[, "start"] == a[1], 2,
+                                   ifelse(data[, "start"] == a[2], 3,
+                                          ifelse(data[, "start"] == a[3], 4,
+                                                 5))))
> data <- data[order(data$id, data$start),
+              c("id", "period", "start", "stop", "status")]
> data
  id period start stop status
2  1      1     0    1      0
3  1      2     1    3      0
1  1      3     3    5      1
4

1 回答 1

0

我会把它写成一个适当的可重现的解决方案:

df <- data.frame( id=1, time=5, status=2)
a <- c(1, 3, 7)

res.fn <- function(df, a) {
    id <- rep(1, length(a))
    period <- 1:length(a)
    start <- c(0, a[1:(length(a)-1)])
    stop <- c(a[1:(length(a)-1)], df$time)
    status <- c(rep(0, length(a)-1), df$status)
    data.frame(id, period, start, stop, status)
}
> res.fn(df, a)

  id period start stop status
1  1      1     0    1      0
2  1      2     1    3      0
3  1      3     3    5      2
于 2013-01-15T15:44:08.517 回答