5

我正在尝试将 R + MonetDB 用作大数据分析堆栈,但在创建新列并用我的分析中的数据填充它时遇到了麻烦。这是一个玩具示例:

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")

# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")


## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)

和错误信息:

Error in .local(conn, statement, ...) : 
  Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297, 
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in%  :
  the condition has length > 1 and only the first element will be used

从这个错误我推测可能bind.data不能与MonetDBLite?

问题:

如何将列添加到 MonetDBLite 表并使用 R 会话中的数据填充它?

4

2 回答 2

0

首先,您最后一条语句中的“INSERT”命令不正确。您将需要“UPDATE”语句。

话虽如此,我提出了一个解决方案,您可以直接从 R 填充您的 MonetDBLite 表:

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")

# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)

# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")


## insert data into the new column
v1 <- mtcars["mpg"] * pi

for (i in 1:nrow(mtcars)){
   myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
   dbSendQuery(db, myquery )
}
于 2018-08-27T12:44:14.677 回答
0

dbBind通过使用 MonetDBLite进行的参数化 SQL 查询似乎存在问题(请参阅https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16)。下面的代码适用于 SQLite:

library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")

## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)

update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')

dbBind(update_query, mtcars[, c("v1", "row_names")])  # send the updated data
dbClearResult(update_query)  # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)

但是,对于 MonetDBLite,它会为 dbBind 步骤产生错误(和警告):

> dbBind(update_query, mtcars[, c("v1", "row_names")])  # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
 but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
  the condition has length > 1 and only the first element will be used

我想出的一种解决方法是使用包glue_data_sqlglue的“手动”组合查询(无需遍历行):

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)

dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")

library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)

# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)

另一种解决方法是insert在一个查询中进行(更接近@Zelazny7 的原始尝试):

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")

dbSendQuery(db, "CREATE TABLE mtcars (
                 row_names VARCHAR(32),
                 v1 DOUBLE);")

library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)

dbReadTable(db, "mtcars")
dbDisconnect(db)
于 2018-08-27T22:30:15.273 回答