1

I am doing a problem in Swirl. Week 2 R Programming Environment Chapter 12 Data Manipulation

Swirl wants me to make a table that looks exactly like this:

 Pclass   agecat    Sex      N     survivors   perc_survived
 <int>   <fctr>    <chr>   <int>     <int>         <dbl>
   1    Under 15  female     2         1        50.000000
   1    Under 15    male     3         3       100.000000
   1    15 to 50  female    70        68        97.142857
   1    15 to 50    male    72        32        44.444444
   1    Over 50   female    13        13       100.000000
   1    Over 50     male    26         5        19.230769

So far,

>library(dplyr)
> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)
> 
> head (titanic_4)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769
>

Now, it looks like I need to figure out how to add another component into my pipe that will get the table to all columns of the table to align-center.

So far I have tried scale(center = TRUE, scale = TRUE) to the end of the pipe, which gave me the Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric because scale only works with a matrix.

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     scale(center = TRUE, scale = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric
> 
> head (titanic_4)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769

Next, I tried format(justify=center) to the end of the pipe, but the result was not what I am looking for.

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(justify=center)
> print (titanic_4)
 [1] "# A tibble: 18 x 6"                                      "# Groups:   Pclass, agecat [9]"                         
 [3] "   Pclass   agecat    Sex     N survivors perc_survived" "    <int>   <fctr>  <chr> <int>     <int>         <dbl>"
 [5] " 1      1 Under 15 female     2         1    50.0000000" " 2      1 Under 15   male     3         3   100.0000000"
 [7] " 3      1 15 to 50 female    70        68    97.1428571" " 4      1 15 to 50   male    72        32    44.4444444"
 [9] " 5      1  Over 50 female    13        13   100.0000000" " 6      1  Over 50   male    26         5    19.2307692"
[11] " 7      2 Under 15 female    10        10   100.0000000" " 8      2 Under 15   male     9         9   100.0000000"
[13] " 9      2 15 to 50 female    61        56    91.8032787" "10      2 15 to 50   male    78         5     6.4102564"
[15] "11      2  Over 50 female     3         2    66.6666667" "12      2  Over 50   male    12         1     8.3333333"
[17] "13      3 Under 15 female    27        13    48.1481481" "14      3 Under 15   male    27         9    33.3333333"
[19] "15      3 15 to 50 female    74        33    44.5945946" "16      3 15 to 50   male   217        29    13.3640553"
[21] "17      3  Over 50 female     1         1   100.0000000" "18      3  Over 50   male     9         0     0.0000000"

Also, not what Swirl is looking for. Next, I tried format.data.frame(digits=6, justify= "centre", width=10)

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format.data.frame(digits=6, justify= "centre", width=10)
> head (titanic_4)
      Pclass     agecat        Sex          N  survivors perc_survived
1          1  Under 15    female            2          1      50.00000
2          1  Under 15     male             3          3     100.00000
3          1  15 to 50    female           70         68      97.14286
4          1  15 to 50     male            72         32      44.44444
5          1  Over 50     female           13         13     100.00000
6          1  Over 50      male            26          5      19.23077

next I tried format(digits=6, justify= "centre", width=10)

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(digits=6, justify= "centre", width=10)
> head (titanic_4)
[1] "# A\n#   tibble:\n#   18 x\n#   6"             "# Groups:  \n#   Pclass,\n#   agecat\n#   [9]"
[3] "   Pclass"                                     "    <int>"                                    
[5] " 1      1"                                     " 2      1"   

next was

> titanic_4 <- titanic %>% 
+     select(Survived, Pclass, Age, Sex) %>%
+     filter(!is.na(Age)) %>%
+     mutate(agecat = cut(Age, breaks = c(0, 14.99, 50, 150), 
+                         include.lowest = TRUE,
+                         labels = c("Under 15", "15 to 50",
+                                    "Over 50"))) %>%
+     group_by(Pclass,agecat,Sex) %>%
+     summarize(N=n(), survivors = sum(Survived))%>%
+     mutate(perc_survived = survivors/N*100)%>%
+     format(justify= "centre")
> head (titanic_4)
[1] "# A tibble: 18 x 6"                                      "# Groups:   Pclass, agecat [9]"                         
[3] "   Pclass   agecat    Sex     N survivors perc_survived" "    <int>   <fctr>  <chr> <int>     <int>         <dbl>"
[5] " 1      1 Under 15 female     2         1    50.0000000" " 2      1 Under 15   male     3         3   100.0000000"

Any Ideas about how I can get my columns to center-align like the one at the beginning of the question?

Thanks,

Drew

When I attempt to submit my answer to Swirl, swirl gives me a clue.

> submit()

| Sourcing your script...


| Keep trying!

| Check the example output in the comments of the script I opened for you. Make sure the columns in your output are in the same order as shown
| in the example output in the script comments. Try running your code in the console and printing out the result to determine whether it's
| successfully generating the desired final data frame before you submit it.

Suggestions from Comments Section: Mr 42 noticed that my table contains rownames while the swirl table does not have rownames. I tried to use his suggestions to remove the rownames, but they were not successful.

I tried to turn off the rownames in head:

> head(titanic_4,rownames=FALSE)
# A tibble: 6 x 6
# Groups:   Pclass, agecat [3]
  Pclass   agecat    Sex     N survivors perc_survived
   <int>   <fctr>  <chr> <int>     <int>         <dbl>
1      1 Under 15 female     2         1     50.000000
2      1 Under 15   male     3         3    100.000000
3      1 15 to 50 female    70        68     97.142857
4      1 15 to 50   male    72        32     44.444444
5      1  Over 50 female    13        13    100.000000
6      1  Over 50   male    26         5     19.230769

I also tried print:

> print( titanic_4, row.names=FALSE)
# A tibble: 18 x 6
# Groups:   Pclass, agecat [9]
   Pclass   agecat    Sex     N survivors perc_survived
    <int>   <fctr>  <chr> <int>     <int>         <dbl>
 1      1 Under 15 female     2         1    50.0000000
 2      1 Under 15   male     3         3   100.0000000
 3      1 15 to 50 female    70        68    97.1428571
 4      1 15 to 50   male    72        32    44.4444444
 5      1  Over 50 female    13        13   100.0000000
 6      1  Over 50   male    26         5    19.2307692

And print.data.table:

> print.data.table(titanic_4,row.names=FALSE)
Error in print.data.table(titanic_4, row.names = FALSE) : 
  could not find function "print.data.table"
4

0 回答 0