-1

我的(虚构的)数据:

dat <- structure(list(animal = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), 
    oxygen = c(25L, 24L, 28L, 30L, 25L, 30L, 28L, 27L, 20L, 22L, 
    20L, 27L, 26L, 24L, 26L, 22L, 30L, 25L, 26L, 28L, 27L, 30L, 
    27L, 28L, 28L, 20L, 23L, 29L), time = c(49L, 33L, 2L, 22L, 
    15L, 22L, 49L, 40L, 11L, 2L, 24L, 48L, 32L, 18L, 39L, 46L, 
    6L, 24L, 26L, 40L, 26L, 26L, 1L, 36L, 4L, 17L, 50L, 24L), 
    habitat = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 
    1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L), .Label = c("clean", "dirty"), class = "factor")), .Names = c("animal", 
"oxygen", "time", "habitat"), class = "data.frame", row.names = c(NA, 
-28L))

变量解释:

动物:有 4 只动物(A、B、C 和 D)进行了耗氧量测试。

氧气:耗氧率;对每只动物进行多次测量。

时间:自机器开始测量耗氧量以来的时间(以分钟为单位)。

条件:表示采集动物的栖息地条件;清洁或肮脏(污染)的栖息地。

我想(通过 a t-test)测试的是来自干净和肮脏(污染)栖息地的动物之间的平均耗氧率不同的地方。但是,我想将我的分析限制在每只动物在 5 到 48 分钟之间的最低耗氧量值的三分之一。

谁能提供我的 R 代码,可以对我的数据进行子集化,以仅包含每只动物最低三分之一的耗氧率以及 5-48 分钟之间的耗氧率?

我正在尝试这样的事情,但是下面的代码并没有做我想要的(我认为它的作用是它从所有数据中选择最低的三分之一,而不是每只动物的最低三分之一):

newdat <- subset(dat, oxygen <= quantile(oxygen, 1/3) & time >= 5 & time >=48)
4

3 回答 3

2

就像是:

library(plyr)
newdat <- ddply(dat, "animal",
      subset,
        oxygen <= quantile(oxygen, 1/3) & time >= 5 & time <=48)

##    animal oxygen time habitat
## 1       A     25   15   clean
## 2       A     20   11   clean
## 3       B     24   33   clean
## 4       B     24   18   clean
## 5       B     20   17   clean
## 6       C     20   24   dirty
## 7       C     26   39   dirty
## 8       C     26   26   dirty
## 9       D     27   40   dirty
## 10      D     27   48   dirty
## 11      D     22   46   dirty
于 2013-10-18T03:30:42.643 回答
1

编辑,我之前误解了你的问题

library(data.table)
dat <- data.table(dat)
subsetted <- dat[time < 48 & time > 5 , LowestOneThird := (oxygen <= quantile(oxygen, 1/3)), by = c('animal')][LowestOneThird == TRUE]

输出:

    > subsetted
   animal oxygen time habitat LowestOneThird
1:      A     20   11   clean           TRUE
2:      A     25   15   clean           TRUE
3:      B     20   17   clean           TRUE
4:      B     24   18   clean           TRUE
5:      B     24   33   clean           TRUE
6:      C     20   24   dirty           TRUE
7:      D     27   40   dirty           TRUE
8:      D     22   46   dirty           TRUE
于 2013-10-18T03:26:58.750 回答
0

您可以by从基础 R 中使用do.call (rbind)

dat1 <- with(dat,by(dat,animal,subset,oxygen <= quantile(oxygen, 1/3) & time >= 5 & time <=48))
> dat1
animal: A
  animal oxygen time habitat
5      A     25   15   clean
9      A     20   11   clean
----------------------------------------------------------------------------------------------------------- 
animal: B
   animal oxygen time habitat
2       B     24   33   clean
14      B     24   18   clean
26      B     20   17   clean
----------------------------------------------------------------------------------------------------------- 
animal: C
   animal oxygen time habitat
11      C     20   24   dirty
15      C     26   39   dirty
19      C     26   26   dirty
----------------------------------------------------------------------------------------------------------- 
animal: D
   animal oxygen time habitat
8       D     27   40   dirty
12      D     27   48   dirty
16      D     22   46   dirty


do.call(rbind,dat1)
     animal oxygen time habitat
A.5       A     25   15   clean
A.9       A     20   11   clean
B.2       B     24   33   clean
B.14      B     24   18   clean
B.26      B     20   17   clean
C.11      C     20   24   dirty
C.15      C     26   39   dirty
C.19      C     26   26   dirty
D.8       D     27   40   dirty
D.12      D     27   48   dirty
D.16      D     22   46   dirty
于 2013-10-18T04:00:59.417 回答