1

我有以下数据,其中:
PseudoID = ID,Trim_SSW = 妊娠三个月(1、2 或 3),体重= SSW 时间点的受试者体重,SSW = 孕周

structure(list(PseudoID = c(1001L, 1001L, 1001L, 1001L, 1001L, 
1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1002L, 
1002L, 1002L, 1002L, 1002L, 1002L, 1002L), Trim_SSW = c("T1", 
"T1", "T2", "T2", "T2", "T3", "T3", "T3", "T3", "T3", "T3", "T3", 
"T3", "T1", "T1", "T1", "T2", "T2", "T2", "T3"), weight = c(120.8, 
120.9, 120.8, 122.2, 122.5, 125, 124.6, 126.6, 126.4, 126.7, 
126.7, 128, 129.3, 101.7, 100.5, 100.2, 101.7, 104.2, 104.2, 
105.6), SSW = c(8L, 12L, 15L, 19L, 23L, 27L, 30L, 33L, 35L, 36L, 
37L, 38L, 39L, 9L, 10L, 13L, 18L, 22L, 25L, 29L)), .Names = c("PseudoID", 
"Trim_SSW", "weight", "SSW"), row.names = c(4L, 5L, 15L, 12L, 
17L, 16L, 11L, 6L, 3L, 1L, 2L, 18L, 20L, 46L, 47L, 49L, 42L, 
43L, 48L, 31L), class = "data.frame")

对于每个 ID,我想查看每个三个月的体重增加情况。因此,我需要分别在 T1、T2 和 T3 中找到最后一次测量和第一次测量之间的差异。在三个月内只有 1 次测量可用的情况下,我想输入 NA。我假设制作 3 个新变量(T1gain、T2gain、T3gain)会简化进一步的计算。

我在这里看到了类似的帖子,但它没有完全回答我的问题,我被要求发一个新帖子。

4

2 回答 2

1

试试这个!我正在使用dplyrand data.table,这两个都是强大的包。我不知道您想要的确切输出,因此您可以比较 和 的两个输出,b看看c哪个满足您的需求。

# doing the calculations                                                                                                                                                                                                                                                                          17L, 16L, 11L, 6L, 3L, 1L, 2L, 18L, 20L, 46L, 47L, 49L, 42L, 
library(dplyr)                                                                                                                                                                                                                                                                       43L, 48L, 31L), class = "data.frame")
b <- a %>%
  group_by(PseudoID, Trim_SSW) %>%
  summarize(gain = last(weight) - first(weight))

# reshaping data from long to wide format
library(data.table)
c <- setDT(b)
c <- dcast(c, PseudoID ~ Trim_SSW, value.var = "gain")

# rename column names
names(c)[2:4] <- c("T1_gain", "T2_gain", "T3_gain")


# comparing the two outputs
> b
# A tibble: 6 x 3
# Groups:   PseudoID [?]
  PseudoID Trim_SSW  gain
     <int>    <chr> <dbl>
1     1001       T1   0.1
2     1001       T2   1.7
3     1001       T3   4.3
4     1002       T1  -1.5
5     1002       T2   2.5
6     1002       T3   0.0

> c
   PseudoID T1_gain T2_gain T3_gain
1:     1001     0.1     1.7     4.3
2:     1002    -1.5     2.5     0.0

当然,如果您愿意b,请确保相应地更改变量的类类型(因子、因子、数字,按此顺序)

于 2017-11-10T09:37:59.480 回答
0
library(dplyr)
library(tidyr)

structure(list(PseudoID = c(1001L, 1001L, 1001L, 1001L, 1001L, 
1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1001L, 1002L, 
1002L, 1002L, 1002L, 1002L, 1002L, 1002L), Trim_SSW = c("T1", 
"T1", "T2", "T2", "T2", "T3", "T3", "T3", "T3", "T3", "T3", "T3", 
"T3", "T1", "T1", "T1", "T2", "T2", "T2", "T3"), weight = c(120.8, 
120.9, 120.8, 122.2, 122.5, 125, 124.6, 126.6, 126.4, 126.7, 
126.7, 128, 129.3, 101.7, 100.5, 100.2, 101.7, 104.2, 104.2, 
105.6), SSW = c(8L, 12L, 15L, 19L, 23L, 27L, 30L, 33L, 35L, 36L, 
37L, 38L, 39L, 9L, 10L, 13L, 18L, 22L, 25L, 29L)), .Names = c("PseudoID", 
"Trim_SSW", "weight", "SSW"), row.names = c(4L, 5L, 15L, 12L, 
17L, 16L, 11L, 6L, 3L, 1L, 2L, 18L, 20L, 46L, 47L, 49L, 42L, 
43L, 48L, 31L), class = "data.frame") -> dt

dt %>%
  group_by(PseudoID, Trim_SSW) %>%                                       # for each ID and trimester
  summarise(Weight_gain = weight[length(weight)] - weight[1],            # get last - first weights
            NumRows = n(),                                               # count number of rows/observations
            Weight_gain = ifelse(NumRows == 1, NA, Weight_gain)) %>%     # flag with NA if there's only 1 row
  select(-NumRows) %>%                                                   # remove unecessary column
  mutate(Trim_SSW = paste0(Trim_SSW, "_gain")) %>%                       # change the names
  ungroup() %>%                                                          # forget the grouping
  spread(Trim_SSW, Weight_gain)                                          # reshape

# # A tibble: 2 x 4
#   PseudoID T1_gain T2_gain T3_gain
# *    <int>   <dbl>   <dbl>   <dbl>
# 1     1001     0.1     1.7     4.3
# 2     1002    -1.5     2.5      NA
于 2017-11-10T09:16:37.237 回答