3

在修改数据帧中的一个条目时,R 似乎复制了整个数据帧。我想知道是否有办法让 R 只复制相应的数据列(例如下面的特定 INTSXP 而不是 VECSXP)来维护更改时复制策略?还有办法对数据框进行就地修改吗?

> x<-data.frame(x=1:1000000,y=1:1000000)
> .Internal(inspect(x))
@62cd2b0 19 VECSXP g0c2 [OBJ,MARK,NAM(2),ATT] (len=2, tl=0)
  @f80d0e0 13 INTSXP g0c7 [MARK] (len=1000000, tl=0) 1,2,3,4,5,...
  @8ed6970 13 INTSXP g0c7 [] (len=1000000, tl=0) 1,2,3,4,5,...
ATTRIB:
  @68f6b40 02 LISTSXP g0c0 []
    TAG: @4e58868 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "names" (has value)
    @613efd0 16 STRSXP g0c2 [] (len=2, tl=0)
      @4e93038 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
      @4fe8bd8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
    TAG: @4e62650 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
    @113bb328 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-1000000
    TAG: @4e58d38 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
    @113aa1d8 16 STRSXP g0c1 [MARK,NAM(2)] (len=1, tl=0)
      @4ee78a0 09 CHARSXP g1c2 [MARK,gp=0x61] [ASCII] [cached] "data.frame"
> x[1,1]<-3L
>  .Internal(inspect(x))
@68eb9f8 19 VECSXP g0c2 [OBJ,NAM(2),ATT] (len=2, tl=0)
  @6507290 13 INTSXP g0c7 [] (len=1000000, tl=0) 3,2,3,4,5,...
  @7422920 13 INTSXP g0c7 [] (len=1000000, tl=0) 1,2,3,4,5,...
ATTRIB:
  @68ef738 02 LISTSXP g0c0 []
    TAG: @4e58868 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "names" (has value)
    @68ebaa0 16 STRSXP g0c2 [NAM(2)] (len=2, tl=0)
      @4e93038 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
      @4fe8bd8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
    TAG: @4e62650 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
    @f43c418 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-1000000
    TAG: @4e58d38 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
    @f43c4d8 16 STRSXP g0c1 [NAM(1)] (len=1, tl=0)
      @4ee78a0 09 CHARSXP g1c2 [MARK,gp=0x61] [ASCII] [cached] "data.frame"

谢谢!

4

2 回答 2

7

您应该为此使用data.table它,它正是为此目的而完成的。并阅读这篇参考文章

R) dt<-data.table(x=1:10,y=1:10)
R) .Internal(inspect(dt))
@0x000000000dce56c0 19 VECSXP g0c7 [OBJ,NAM(1),ATT] (len=2, tl=100)
  @0x000000000ebc4100 13 INTSXP g0c4 [NAM(2)] (len=10, tl=0) 1,2,3,4,5,...
  @0x000000000ebc41b0 13 INTSXP g0c4 [NAM(2)] (len=10, tl=0) 1,2,3,4,5,...
ATTRIB:
  @0x000000000e6c2d00 02 LISTSXP g0c0 [] 
    TAG: @0x00000000003b0088 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "names" (has value)
    @0x000000000cc99fd0 16 STRSXP g0c7 [NAM(2)] (len=2, tl=100)
      @0x00000000003ddbb8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
      @0x000000000734f4d8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
    TAG: @0x00000000003b1d98 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
    @0x0000000014487f98 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-10
    TAG: @0x00000000003b0558 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
    @0x000000000ead1910 16 STRSXP g0c2 [] (len=2, tl=0)
      @0x000000000753f440 09 CHARSXP g1c2 [MARK,gp=0x61] [ASCII] [cached] "data.table"
      @0x000000000715f398 09 CHARSXP g1c2 [MARK,gp=0x61,ATT] [ASCII] [cached] "data.frame"
    TAG: @0x000000000c3d7cc0 01 SYMSXP g1c0 [MARK] ".internal.selfref"
    @0x000000000e6c1e80 22 EXTPTRSXP g0c0 [] 
R) dt[,y:=y+1]
R) .Internal(inspect(dt))
@0x000000000dce56c0 19 VECSXP g0c7 [OBJ,NAM(2),ATT] (len=2, tl=100)
  @0x000000000ebc4100 13 INTSXP g0c4 [NAM(2)] (len=10, tl=0) 1,2,3,4,5,...
  @0x000000000ebc6728 14 REALSXP g0c6 [NAM(1)] (len=10, tl=0) 2,3,4,5,6,...
ATTRIB:
  @0x000000000e6c2d00 02 LISTSXP g0c0 [] 
    TAG: @0x00000000003b0088 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "names" (has value)
    @0x000000000cc99fd0 16 STRSXP g0c7 [NAM(2)] (len=2, tl=100)
      @0x00000000003ddbb8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
      @0x000000000734f4d8 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
    TAG: @0x00000000003b1d98 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
    @0x0000000014487f98 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-10
    TAG: @0x00000000003b0558 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
    @0x000000000ead1910 16 STRSXP g0c2 [NAM(2)] (len=2, tl=0)
      @0x000000000753f440 09 CHARSXP g1c2 [MARK,gp=0x61] [ASCII] [cached] "data.table"
      @0x000000000715f398 09 CHARSXP g1c2 [MARK,gp=0x61,ATT] [ASCII] [cached] "data.frame"
    TAG: @0x000000000c3d7cc0 01 SYMSXP g1c0 [MARK] ".internal.selfref"
    @0x000000000e6c1e80 22 EXTPTRSXP g0c0 [] 
于 2013-08-22T15:22:05.320 回答
5

@stat_quant 是正确的,这data.table是要走的路。

但是 也有一个可以就地修改data.table的功能(修改扩展setdata.frames

用一个更小的例子

x <- data.frame(x = 1:10, y = 1:10)
# @0x00000000120dbca8 19 VECSXP g0c2 [OBJ,NAM(2),ATT] (len=2, tl=0)
#   @0x0000000011631328 13 INTSXP g0c4 [] (len=10, tl=0) 1,2,3,4,5,...
#   @0x0000000011631380 13 INTSXP g0c4 [] (len=10, tl=0) 1,2,3,4,5,...
# ATTRIB:
#   @0x0000000020964420 02 LISTSXP g0c0 [] 
#   TAG: @0x0000000000330088 01 SYMSXP g1c0 [MARK,NAM(2),LCK,gp=0x4000] "names" (has value)
#   @0x00000000120dafe8 16 STRSXP g0c2 [NAM(1)] (len=2, tl=0)
#     @0x000000000037dc60 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
#     @0x0000000008dc35c0 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
#   TAG: @0x0000000000331d98 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
#   @0x000000001165e700 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-10
#   TAG: @0x0000000000330558 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
#   @0x000000001165e730 16 STRSXP g0c1 [NAM(2)] (len=1, tl=0)
#     @0x00000000003fc4a0 09 CHARSXP g1c2 [MARK,gp=0x61,ATT] [ASCII] [cached] "data.frame"
.Internal(inspect(x))
set(x,i=1L,j=1L,value = 3L)
.Internal(inspect(x))
# @0x00000000120dbca8 19 VECSXP g0c2 [OBJ,NAM(2),ATT] (len=2, tl=0)
#   @0x0000000011631328 13 INTSXP g0c4 [] (len=10, tl=0) 3,2,3,4,5,...
#   @0x0000000011631380 13 INTSXP g0c4 [] (len=10, tl=0) 1,2,3,4,5,...
# ATTRIB:
#   @0x0000000020964420 02 LISTSXP g0c0 [] 
#   TAG: @0x0000000000330088 01 SYMSXP g1c0 [MARK,NAM(2),LCK,gp=0x4000] "names" (has value)
#   @0x00000000120dafe8 16 STRSXP g0c2 [NAM(2)] (len=2, tl=0)
#     @0x000000000037dc60 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "x"
#   @0x0000000008dc35c0 09 CHARSXP g1c1 [MARK,gp=0x61] [ASCII] [cached] "y"
#   TAG: @0x0000000000331d98 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "row.names" (has value)
#   @0x000000001165e700 13 INTSXP g0c1 [] (len=2, tl=0) -2147483648,-10
#   TAG: @0x0000000000330558 01 SYMSXP g1c0 [MARK,LCK,gp=0x4000] "class" (has value)
#   @0x000000001165e730 16 STRSXP g0c1 [NAM(2)] (len=1, tl=0)
#     @0x00000000003fc4a0 09 CHARSXP g1c2 [MARK,gp=0x61,ATT] [ASCII] [cached] "data.frame"
head(x)
#   x y
# 1 3 1
# 2 2 2
# 3 3 3
# 4 4 4
# 5 5 5
# 6 6 6

随着set您可以更改 a 的现有列中的值data.frame,您不能添加列(就像您可以使用:=and一样data.table

于 2013-08-23T01:26:36.857 回答