2

我有一个 50GB 的 SQLite 数据库文件,我想计算和添加新变量。您可以利用Moody_Mudskipper 的函数或使用ALTER TABLE 和 UPDATE的东西来创建变量而不是整个表吗?

library(dbplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, head(iris,3),"iris")

create <- function(data,name){
   DBI::dbSendQuery(data$src$con,
                    paste("CREATE TABLE", name,"AS", dbplyr::sql_render(data)))
                             }

tbl(con,"iris") %>% 
   mutate(Sepal.Area= Sepal.Length * Sepal.Width) %>% 
   create("iris_2")
4

1 回答 1

4

pool这是一个使用and的简单解决方案,DBI因为您可以直接编写和执行任何有效的 SQL 语句。

library(DBI)
library(pool)
library(RSQLite)

#Database
#Create a connection
pool <- dbPool(drv =RSQLite::SQLite(),dbname="")
#Colse open connection when it is no longer needed, to prevent leaks
poolClose(pool)


dbWriteTable(conn = pool, name = "mtcars", value = mtcars)

insert_new_column <- function(table_name, column_name, column_type){
        query <- sqlInterpolate(pool,"ALTER TABLE ?table ADD COLUMN ?column ?type;",table=table_name, column=column_name, type=column_type)

        dbExecute(pool,query)

}

insert_new_column(table_name="mtcars", column_name="test", column_type="REAL")  #More types: INTEGER, TEXT

dbGetQuery(pool,"Select * from mtcars;")

head(dbReadTable(pool, "mtcars"))
   mpg cyl disp  hp drat    wt  qsec vs am gear carb test
1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4  NA
2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4  NA
3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1  NA
4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1  NA
5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2  NA
6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1  NA

然后你可以这样做:

tbl(pool,"mtcars") %>% 
    mutate(test= cyl * 2)

更新

con <- dbConnect(RSQLite::SQLite(), ":memory:")

dbWriteTable(con, "mtcars", mtcars)
insert_new_column2 <- function(table_name, column_name, column_type){
      dbWithTransaction(
            con,{
                   dbExecute(con, sqlInterpolate(con,"ALTER TABLE ?table ADD COLUMN ?column ?type;",table=table_name, column=column_name, type=column_type))

                   dbExecute(con, sqlInterpolate(con,"UPDATE ?table SET ?column = cyl*2;",table=table_name, column=column_name))

  }) 
 }
于 2018-06-17T03:45:23.380 回答