0

I have a column of hours, and I need to group it up into a new column.

structure(list(Q11aWalkHoursEdit.SS = c(0, 1, 2, 3, 4, 5, 6, 
6, 7, 2.5, 6, 1.9, 0.5)), 
.Names = "Q11aWalkHoursEdit.SS", 
class = "data.frame", 
row.names = c(NA, -13L))

I initially thought I'd use if, as follows:

grouphours <- function(data){
  data$Q11aWalkHours.SS <- NA
  if(data["Q11aWalkHoursEdit.SS"] >= 5){
    data["Q11aWalkHours.SS"] = "5 hours +"
  } else if(data["Q11aWalkHoursEdit.SS"] > 4){
    data["Q11aWalkHours.SS"] = "4-5 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 3){
    data["Q11aWalkHours.SS"] = "3-4 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 2){
    data["Q11aWalkHours.SS"] = "2-3 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 1){
    data["Q11aWalkHours.SS"] = "1-2 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 0){
    data["Q11aWalkHours.SS"] = "0-1 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] == 0){
    data["Q11aWalkHours.SS"] = "0 hours"
  } else {
    data["Q11aWalkHours.SS"] = NA
  }
  return(data)    
}
test <- grouphours(stuff)

But this doesn't work, because if doesn't work on vectors. It gives the following error:

1: In if (data["Q11aWalkHoursEdit.SS"] >= 5) { ... :
  the condition has length > 1 and only the first element will be used

I then started writing a for loop, which also doesn't work, and I don't really understand why:

grouphours <- function(data){
  data$Q11aWalkHours.SS <- NA
  l<-length(stuff$Q11aWalkHoursEdit.SS)
  for(i in 1:l){
  if(data["Q11aWalkHoursEdit.SS"] >= 5){
    data["Q11aWalkHours.SS"] = "5 hours +"
  } else if(data["Q11aWalkHoursEdit.SS"] > 4){
    data["Q11aWalkHours.SS"] = "4-5 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 3){
    data["Q11aWalkHours.SS"] = "3-4 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 2){
    data["Q11aWalkHours.SS"] = "2-3 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 1){
    data["Q11aWalkHours.SS"] = "1-2 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] > 0){
    data["Q11aWalkHours.SS"] = "0-1 hours"
  } else if(data["Q11aWalkHoursEdit.SS"] == 0){
    data["Q11aWalkHours.SS"] = "0 hours"
  } else {
    data["Q11aWalkHours.SS"] = NA
  }
  }
  return(data)    
}
test <- grouphours(stuff)

Regardless, I feel like I shouldn't need to use a loop - based on comments in this question: Warning "the condition has length > 1 and only the first element will be used" But I'm having trouble understanding how to apply those responses to my situation.

EDIT:

Thank you for helping everyone. I used the following code which made it work perfectly. I think cut would be a 'better' solution, but I don't understand it yet so will use ifelse for now.

stuff$test <- ifelse(stuff$Q11aWalkHoursEdit.SS>=5, "Five +", 
              ifelse (stuff$Q11aWalkHoursEdit.SS>=4, "Four to five", 
                      ifelse (stuff$Q11aWalkHoursEdit.SS>=3, "Three to four", 
                              ifelse (stuff$Q11aWalkHoursEdit.SS>=2, "Two to three", 
                                      ifelse (stuff$Q11aWalkHoursEdit.SS>=1, "One to two", 
                                              ifelse(stuff$Q11aWalkHoursEdit.SS>0, "0 to one", "Zero")
                                      )
                              )
                      )
              )
)
4

2 回答 2

3

Use cut:

DF$interval <- cut(DF[,1], breaks=c(-Inf, 0:5,Inf))

The only problem is the condition >= 5, which requires a manual correction:

DF$interval[DF[,1]==5] <- "(5, Inf]"
levels(DF$interval)[6:7] <- c("(4,5)","[5, Inf]")

   Q11aWalkHoursEdit.SS interval
1                   0.0 (-Inf,0]
2                   1.0    (0,1]
3                   2.0    (1,2]
4                   3.0    (2,3]
5                   4.0    (3,4]
6                   5.0 [5, Inf]
7                   6.0 [5, Inf]
8                   6.0 [5, Inf]
9                   7.0 [5, Inf]
10                  2.5    (2,3]
11                  6.0 [5, Inf]
12                  1.9    (1,2]
13                  0.5    (0,1]
于 2013-10-04T13:42:59.627 回答
0

You can also try ceiling and then update the range, although I'm not sure what interval you want "5" placed in...

x <- data.frame( hrs = c(0, 1, 2, 3, 4, 5, 6, 6, 7, 2.5, 6, 1.9, 0.5))
x$y <- ceiling(x$hrs)
x$y[x$y>6] <- 6
x$y <- ifelse( x$y==0, "0 hours", paste(x$y-1, "-", x$y, " hours", sep="") )
x$y[x$y=="5-6 hours"] <- "5 hours+"
x

   hrs         y
1  0.0   0 hours
2  1.0 0-1 hours
3  2.0 1-2 hours
4  3.0 2-3 hours
5  4.0 3-4 hours
6  5.0 4-5 hours
7  6.0  5 hours+
8  6.0  5 hours+
9  7.0  5 hours+
10 2.5 2-3 hours
11 6.0  5 hours+
12 1.9 1-2 hours
13 0.5 0-1 hours
于 2013-10-04T15:32:03.240 回答