3

我得到一个字符向量:

tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))

我想将它转换为厘米。

请告知我该怎么做?

4

5 回答 5

2

使用stringi包提取相关单元:

library(stringi)

Raw <- c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\"")

## Extract Feet units by regex searching for 1 or more digits followed by a '
Feet <- stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))

## Extract Inch units by regex searching for 1 or 2 digits followed by a "
Inches <- stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))

## Combine Feet and Inches
TotalInches <- 12 * as.numeric(Feet) + as.numeric(Inches)

## Convert to cm
CM <- 2.54 * TotalInches

print(CM)
# [1] 187.96 177.80 165.10 154.94 165.10 162.56

如果您需要对多个列执行此操作,将这些步骤定义为脚本顶部的函数可能是有意义的,这样您就可以更简洁地调用它并且不需要在全局环境中存储中间结果。

此处函数版本中考虑的另一项考虑是将NA匹配项替换为0值,以便有效测量结果类似1'11"返回有效结果,而不是返回有效结果NA

FtInToCm <- function(x){
  Feet <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
  Inches <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
  return(2.54 * (12 * ifelse(is.na(Feet),0,Feet) + ifelse(is.na(Inches),0,Inches)))
}

FtInToCm(Raw)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
于 2019-03-19T15:18:04.310 回答
2

有几种方法可以使用

1)fread粘贴成单个字符串后读取

library(data.table)
fread(paste(sub('"', "", df1$H), collapse="\n"), sep="'")[, 
               as.matrix(.SD) %*% c(30.48, 2.54)][,1]
 #[1] 187.96 177.80 165.10 154.94 165.10 162.56

2) 使用gsubfn

library(gsubfn)
as.numeric(gsubfn("(\\d)'(\\d+)", ~ as.numeric(x) * 30.48 + 
            as.numeric(y) * 2.54, sub('"', '', df1$H)))
 #[1] 187.96 177.80 165.10 154.94 165.10 162.56

3)separate

library(tidyverse)
df1 %>% 
    separate(H, into = c("H1", "H2"), convert = TRUE) %>%
    transmute(H = H1 * 30.48 + H2 * 2.54)
# A tibble: 6 x 1
#      H
#  <dbl>
#1  188.
#2  178.
#3  165.
#4  155.
#5  165.
#6  163.

4)measurements

library(measurements)
library(tidyverse)
df1 %>% 
   separate(H, into = c("H1", "H2"), convert = TRUE) %>%
   transmute(H = conv_unit(H1, "ft", "cm") + conv_unit(H2, "inch", "cm"))
于 2019-03-19T15:17:48.700 回答
2

一种选择是提取所有数字并将其转换为矩阵,然后执行计算。

mat <- stringr::str_extract_all(df$H, "\\d+", simplify = TRUE)

as.numeric(mat[, 1]) * 30.48 + as.numeric(mat[, 2]) * 2.54
#[1] 187.96 177.80 165.10 154.94 165.10 162.56

mat在哪里

#     [,1] [,2]
#[1,] "6"  "2" 
#[2,] "5"  "10"
#[3,] "5"  "5" 
#[4,] "5"  "1" 
#[5,] "5"  "5" 
#[6,] "5"  "4" 

第一列是英尺,第二列是英寸。


只是出于我自己的好奇心,我想在基础 R 中解决这个问题

sapply(strsplit(sub("(\\d+)'(\\d+).*", "\\1-\\2", df$H), "-"), function(x) 
       as.numeric(x[1]) * 30.48 + as.numeric(x[2]) * 2.54)

#[1] 187.96 177.80 165.10 154.94 165.10 162.56

这遵循类似的逻辑,从字符串中提取 2 个数字,使用sub拆分它们strsplit,然后将它们中的每一个转换为数字并执行计算。

于 2019-03-19T15:17:56.023 回答
1

我添加了另一个答案,只是为了给你另一个选择,因为我在看到其他答案之前就已经写好了。

我先将字符串转换为数字,然后再转换单位:

library(dplyr)
library(stringr)
df <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))

df %>% 
  mutate(foot = str_extract(H, "^\\d+'"), 
         inch = str_extract(H, "\\d+\"$")) %>% # split foot from inch
  mutate(foot = as.numeric(str_remove(foot, "[^\\d]")),
         inch = as.numeric(str_remove(inch, "[^\\d]"))) %>% # convert to numeric
  mutate(H_new = cm(foot * 12) + cm(inch)) # convert units
# A tibble: 6 x 4
  H         foot  inch H_new
  <chr>    <dbl> <dbl> <dbl>
1 "6'2\""      6     2  188.
2 "5'10\""     5    10  178.
3 "5'5\""      5     5  165.
4 "5'1\""      5     1  155.
5 "5'5\""      5     5  165.
6 "5'4\""      5     4  163.
于 2019-03-19T15:21:23.493 回答
1
> dat <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
> dat$inches <- gsub("[\"]", "",dat$H) %>% 
    strsplit(., "'") %>% 
    lapply(., function(x) {
      x <- as.numeric(x);
      (x[1]*30.48) + (x[2]/12)*30.48
    }) %>% 
   unlist
> dat
# A tibble: 6 x 2
  H        inches
  <chr>     <dbl>
1 "6'2\""    188.
2 "5'10\""   178.
3 "5'5\""    165.
4 "5'1\""    155.
5 "5'5\""    165.
6 "5'4\""    163.

你甚至可以使用map而不是lapply

> gsub("[\"]", "",dat$H) %>% 
    strsplit(., "'") %>% 
    map_dbl(function(x){
      x <- as.numeric(x)
      (x[1]*30.48) + (x[2]/12)*30.48
    })
[1] 187.96 177.80 165.10 154.94 165.10 162.56
于 2019-03-19T15:17:10.353 回答