0

在 SQL 中,您可以编写如下语句:

SELECT *
FROM table
WHERE (var1, var2, var3, var4) IN (("var1-1", "var2-1", "var3-1", "var4-1"),
                                   ("var1-2", "var2-2", "var3-2", "var4-2"))

这意味着抓取所有行,其中 (var1 == "var1-1" and var2 == "var2-1" and var3 == "var3-1" and var4 == "var4-1") or (var1 == " var1-2" 和 var2 == "var2-2" 和 var3 == "var3-2" 和 var4 == "var4-2")

有没有办法以编程方式在 dbplyr 中进行类似的查询?

例如,假设我有一个小标题:

tribble(
    ~var1,     ~var2,    ~var3,    ~var4,
    "var1-1",  "var2-1", "var3-1", "var4-1",
    "var1-2",  "var2-2", "var3-2", "var4-2"
  )

是否有某种函数可以让 dbplyr 构建一个像上面那样的 SQL 语句?

4

2 回答 2

3

1)使用inner_join

library(dplyr)

# test data
v <- paste0("var", 1:4)
DF1 <- as.data.frame(t(outer(v, 1:3, paste, sep = "-")), stringsAsFactors = FALSE)
DF2 <- as.data.frame(t(outer(v, 2:4, paste, sep = "-")), stringsAsFactors = FALSE)

DF1 %>% inner_join(DF2)

给予:

Joining, by = c("V1", "V2", "V3", "V4")
# A tibble: 2 x 4
  V1     V2     V3     V4    
  <chr>  <chr>  <chr>  <chr> 
1 var1-2 var2-2 var3-2 var4-2
2 var1-3 var2-3 var3-3 var4-3

2)在基础 R 中,我们可以使用merge

merge(DF1, DF2)

或者intersect

intersect(DF1, DF2)

3)在 dbplyr 中:

library(dbplyr)

# set up backend using DF1 and DF2 from (1)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, DF1, "DF1")
copy_to(con, DF2, "DF2")

DF1_db <- tbl(con, "DF1")
DF2_db <- tbl(con, "DF2")
DF1_db %>% inner_join(DF2_db)

给予:

Joining, by = c("V1", "V2", "V3", "V4")
# Source:   lazy query [?? x 4]
# Database: sqlite 3.19.3 []
  V1     V2     V3     V4    
  <chr>  <chr>  <chr>  <chr> 
1 var1-2 var2-2 var3-2 var4-2
2 var1-3 var2-3 var3-3 var4-3

如果您有一个 tibble 和一个数据库表,您将需要使用将 tibble 复制到数据库copy_to或将数据库表抓取到 R inner_join中。不能混合源。

于 2018-05-15T20:51:28.423 回答
2

使用一些高级的 R 编程purrrrlang,你应该能够为它创建一个表达式filter(),我认为,做你所要求的确切的事情,这里是一个使用mtcars. 我tribble使用适用于该示例的数据重新创建了您的数据。我还添加了内联注释来帮助解释每个步骤的作用:

library(dbplyr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(purrr, warn.conflicts = FALSE)
library(DBI, warn.conflicts = FALSE)
library(rlang, warn.conflicts = FALSE)

con <- DBI::dbConnect(RSQLite::SQLite(), path = ":dbname:")
db_mtcars <- copy_to(con, mtcars)

filters <- tribble(
  ~gear, ~carb,    
  4,     4,
  3,     1
)
# transpose() converts rows into lists entries
filter_expr <- transpose(filters) %>%
  # iterate through each row
  map(~{
    # iterate through each pair ie: gear == 4
    imap(.x, ~expr(!!sym(.y) == !!.x)) %>%
      # creates one call in the row by 
      # "collapsing" all pairs, separating them with &
      reduce(function(x, y) expr((!!x & !!y)))
    }) %>%
  # creates one call collapsing all row calls 
  # into a single one, separating them with |
  reduce(function(x, y ) expr(!!x | !! y))
# the resulting call
filter_expr
#> (gear == 4 & carb == 4) | (gear == 3 & carb == 1)

filtered_mtcars <- db_mtcars %>%
  # Use bang-bang (!!) to execute the new call
  filter(!! filter_expr)

filtered_mtcars
#> # Source:   lazy query [?? x 11]
#> # Database: sqlite 3.22.0 []
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
#> 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
#> 3  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#> 4  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#> 5  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> 6  17.8     6  168.   123  3.92  3.44  18.9     1     0     4     4
#> 7  21.5     4  120.    97  3.7   2.46  20.0     1     0     3     1

show_query(filtered_mtcars)
#> <SQL>
#> SELECT *
#> FROM `mtcars`
#> WHERE ((`gear` = 4.0 AND `carb` = 4.0) OR (`gear` = 3.0 AND `carb` = 1.0))

dbDisconnect(con)
于 2018-09-22T22:44:44.170 回答