3

我想知道当有两种不同的过滤标准时,如何将数据从一个数据帧提取到另一个数据帧中。

我的数据框 A 是

  TransactionNo       Date AccountNo TransactionType Amount Currency
1       1000001 2019-07-01     10001           Spend -12.44      SGD
2       1000002 2019-07-01     10001           Spend -31.92      CNY
3       1000003 2019-07-03     10001           Spend  -8.08      USD
4       1000004 2019-07-04     10001           Spend  -6.02      SGD
5       1000005 2019-07-05     10001           Spend  -5.04      USD
6       1000006 2019-07-06     10001           Spend  -8.43      SGD

我的数据框 B 是

  Date                  USD   CNY   SGD
  <dttm>              <dbl> <dbl> <dbl>
1 2019-07-01 00:00:00  1.35 0.198     1
2 2019-07-02 00:00:00  1.40 0.198     1
3 2019-07-03 00:00:00  1.36 0.197     1
4 2019-07-04 00:00:00  1.37 0.197     1
5 2019-07-05 00:00:00  1.38 0.197     1
6 2019-07-06 00:00:00  1.39 0.197     1

我想在数据框 A 中创建一个名为 Exchange 的新列。此新列中的条目可以取自数据框 B。所取的条目将取决于两个数据框中的货币匹配和日期匹配。例如,数据帧 A 中的第一行是 1。第二行是 0.198。第三行是 1.36。

最终代码看起来像

A <- mutate(A, Exchange = XXXXXXXXXXX)

我明白每个人在生活中都有自己的责任要履行,因此您的时间很宝贵。我非常感谢你们牺牲宝贵的时间和精力来帮助像我这样的新程序员学习基本编程。非常感谢大家。

4

2 回答 2

3

base R中,为行和列创建索引,cbind然后提取值

i1 <- match(as.Date(A$Date), as.Date(B$Date))
j1 <- match(A$Currency, names(B)[-1])
A$Exchange <- B[-1][cbind(i1, j1)]
A$Exchange
#[1] 1.000 0.198 1.360 1.000 1.380 1.000

或者使用tidyverse,我们将数据从“wide”重塑为“long”(“B”),然后进行连接

library(dplyr)
library(tidyr)
B %>%
  mutate(Date = as.Date(Date)) %>% 
  pivot_longer(cols = -Date, names_to = 'Currency', values_to = 'Exchange') %>% 
  right_join(A %>%
               mutate(Date = as.Date(Date)))
# A tibble: 6 x 7
#  Date       Currency Exchange TransactionNo AccountNo TransactionType Amount
#  <date>     <chr>       <dbl>         <int>     <int> <chr>            <dbl>
#1 2019-07-01 SGD         1           1000001     10001 Spend           -12.4 
#2 2019-07-01 CNY         0.198       1000002     10001 Spend           -31.9 
#3 2019-07-03 USD         1.36        1000003     10001 Spend            -8.08
#4 2019-07-04 SGD         1           1000004     10001 Spend            -6.02
#5 2019-07-05 USD         1.38        1000005     10001 Spend            -5.04
#6 2019-07-06 SGD         1           1000006     10001 Spend            -8.43

数据

A <- structure(list(TransactionNo = 1000001:1000006, Date = c("2019-07-01", 
"2019-07-01", "2019-07-03", "2019-07-04", "2019-07-05", "2019-07-06"
), AccountNo = c(10001L, 10001L, 10001L, 10001L, 10001L, 10001L
), TransactionType = c("Spend", "Spend", "Spend", "Spend", "Spend", 
"Spend"), Amount = c(-12.44, -31.92, -8.08, -6.02, -5.04, -8.43
), Currency = c("SGD", "CNY", "USD", "SGD", "USD", "SGD")),
class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6"))

B <- structure(list(Date = c("2019-07-01 00:00:00", "2019-07-02 00:00:00", 
"2019-07-03 00:00:00", "2019-07-04 00:00:00", "2019-07-05 00:00:00", 
"2019-07-06 00:00:00"), USD = c(1.35, 1.4, 1.36, 1.37, 1.38, 
1.39), CNY = c(0.198, 0.198, 0.197, 0.197, 0.197, 0.197), SGD = c(1L, 
1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6"))
于 2019-10-15T06:27:29.673 回答
1

不如 Akrun 高效,而是另一种 Base R 解决方案:

# Reshape B from wide to long: 

    long_B <-
      data.frame(
        reshape(
          B,
          direction = "long",
          varying = names(B)[names(B) != "Date"],
          v.names = "Exchange",
          idvar = "Date",
          timevar = "Currency",
          times = names(B)[names(B) != "Date"]
        ),
        row.names = NULL
      )

# Left join on intersecting vectors: 

a_left_join_b <-
  merge(A,
        long_B,
        by = intersect(colnames(A), colnames(long_B)),
        all.x = TRUE)

一个整洁的替代方案:

ab <- 

  B %>% 

  gather(key = "Currency", value = "Exchange", -Date) %>% 

  right_join(A, by = intersect(colnames(.), colnames(A)), all.y = TRUE)

数据:

A <-
  structure(
    list(
      TransactionNo = 1000001:1000006,
      Date = as.POSIXct(
        c(
          "2019-07-01",
          "2019-07-01",
          "2019-07-03",
          "2019-07-04",
          "2019-07-05",
          "2019-07-06"
        ),
        tz = Sys.timezone()
      ),
      AccountNo = c(10001L, 10001L, 10001L, 10001L, 10001L, 10001L),
      TransactionType = c("Spend", "Spend", "Spend", "Spend", "Spend",
                          "Spend"),
      Amount = c(-12.44, -31.92, -8.08, -6.02, -5.04, -8.43),
      Currency = c("SGD", "CNY", "USD", "SGD", "USD", "SGD")
    ),
    class = "data.frame",
    row.names = c("1",
                  "2", "3", "4", "5", "6")
  )

B <-
  structure(
    list(
      Date = as.POSIXct(
        c(
          "2019-07-01 00:00:00",
          "2019-07-02 00:00:00",
          "2019-07-03 00:00:00",
          "2019-07-04 00:00:00",
          "2019-07-05 00:00:00",
          "2019-07-06 00:00:00"
        ),
        tz = Sys.timezone()
      ),
      USD = c(1.35, 1.4, 1.36, 1.37, 1.38,
              1.39),
      CNY = c(0.198, 0.198, 0.197, 0.197, 0.197, 0.197),
      SGD = c(1L,
              1L, 1L, 1L, 1L, 1L)
    ),
    class = "data.frame",
    row.names = c("1",
                  "2", "3", "4", "5", "6")
  )
于 2019-10-15T09:45:17.990 回答