-1

我很难使用我的 R 代码。我正在尝试根据我已经拥有的数据框创建一个新的数据框,其中每个重复值单独乘以 1000 并按顺序加 1。例如,我的数据框中的值范围从 3869014 到 4524673,每个数字有多个值(最多 100 个)。例如:[3869014,3869014,3869014,3869014,3869014,3869014,3869014,3869014,3869014,3869015,3869015,3869015,3869015 What I want is: [3869014001, 3869014002, 3869014003, 3869014004, 3869014005, 3869014006, 3869014007, 3869014008, 3869015001, 3869015002, 3869015003, 3869015004, 3869016001, 3869016002, 3869016003, 3869016004, etc...]

我尝试了以下代码,但它将每个数字乘以 1000 并添加一个而不考虑重复项。它也只添加一个,而不是添加一个计数(例如:1、2、3、4 等......)。所以输出是 [3869014001, 3869014001, 3869014001, 3869014001, 等等......这不是我想要的。我对在 R 数据帧中循环有点陌生。谢谢您的帮助。

setwd("F:/TimData/SPAM/Ethiopia")
#clear all variables
rm(list=ls())

#install packages
install.packages(c("spatstat","maptools","lattice","sp","RColorBrewer","splancs","maps", "plyr"))
install.packages(c("rgdal","raster","R.utils","spsurvey", "xlsx", "rJava", "foreign"),dep=TRUE)

#load libraries
library(spatstat); library(maptools); library(lattice); library(sp); 
library(RColorBrewer); library(splancs); library(maps)
library(rgdal); library(raster); library(R.utils); library(spsurvey); library(foreign);
library(rJava)
library(xlsx)
library(plyr)

#creating a custom 1km spatial grid

kmgrid = readGDAL("EthiopiaBuffer1km.tif")

#convert raster to data frame
kmgridx= as.data.frame(kmgrid, row.names=NULL, optional=FALSE, xy=FALSE, na.rm=TRUE)

#specify column containing raster values
x=kmgridx$band1

#setting counter for while statement, based on actual min/max values of raster #grid
start = 3869014
finish = 4525673

#setting loop to multiply each duplicate by 1000 and add one, doesn't work

while (start < finish) {
    if (start) {
        for (i in 1:length(x)) {y=(x*1000)+1} 
        start=start +1 }
    }
4

3 回答 3

1

使用dplyr

library(dplyr)
set.seed(1)
df <- data.frame(id = c(rep(1,5), rep(2,5), rep(3,5)), y = rnorm(15))

df %>% group_by(id) %>% mutate(number = (id * 1000) + 1:n())

你得到:

#Source: local data frame [15 x 3]
#Groups: id
#
#   id          y number
#1   1 -0.6264538   1001
#2   1  0.1836433   1002
#3   1 -0.8356286   1003
#4   1  1.5952808   1004
#5   1  0.3295078   1005
#6   2 -0.8204684   2001
#7   2  0.4874291   2002
#8   2  0.7383247   2003
#9   2  0.5757814   2004
#10  2 -0.3053884   2005
#11  3  1.5117812   3001
#12  3  0.3898432   3002
#13  3 -0.6212406   3003
#14  3 -2.2146999   3004
#15  3  1.1249309   3005
于 2015-03-02T21:40:12.017 回答
1

这是一个带有tapply的版本......

a <- c(3869014, 3869014, 3869014, 3869014, 3869014, 3869014, 3869014, 3869014, 3869015, 3869015, 3869015, 3869015, 3869016, 3869016, 3869016, 3869016)
a <- as.character(a)
aa <- unname(unlist(tapply(a, a, function(x)paste0(x, 1000+(1:length(x))))))
> aa
[1] "38690141001" "38690141002" "38690141003" "38690141004" "38690141005" "38690141006"
[7] "38690141007" "38690141008" "38690151001" "38690151002" "38690151003" "38690151004"
[13] "38690161001" "38690161002" "38690161003" "38690161004"
于 2015-03-02T21:07:14.583 回答
1

这可能是您正在寻找的。

id<-c(rep(1,5),rep(2,5),rep(3,5))
y<-rnorm(15)
df<-data.frame(id=id,y=y)
seq_along_mult<-function(x){ 

    y<-x*1000+seq_along(x) #creating your new id variable
    return(y)
}

df$number <- with(df, ave(id, id, FUN=seq_along_mult))

    id         y  number
1   1  0.1872768   1001
2   1  1.9137194   1002
3   1 -0.6226594   1003
4   1 -1.0641839   1004
5   1 -0.3422707   1005
6   2 -0.1013222   2001
7   2  0.5783932   2002
8   2  0.8276480   2003
9   2  1.3111752   2004
10  2  0.1783597   2005
11  3  1.7036697   3001
12  3 -0.5759164   3002
13  3 -0.7028795   3003
14  3 -0.2590082   3004
15  3  1.9239665   3005
于 2015-03-02T20:51:33.823 回答