3

我有两个数据框。一个具有不均匀的每日计数(名为y),另一个具有均匀分布的每周数据(名为gIm;两个变量表示日期:weekStartweekEnd)。我想计算每周之间weekStartweekEnd每周的所有日常观察,并将这个新的计数向量附加到我的每周数据框中。

y<-y[order(as.Date(y$date, format="%Y/%m/%d")),] # Sort by week
start<-unique(gIm$weekStart)
end<-unique(gIm$weekEnd)
gIm$count<-NA

for(l in 1:length(gIm[,1])){ # index by weeks in gIm--365 weeks
for(i in 1:nrow(y)){ # index by no. obs in y
gIm$count[i]<-sum(y$count[y$date >= start[l] & y$date <=end[l] ], na.rm=TRUE)
}
}

这是我间隔不均匀的每日数据(为长度道歉):

structure(list(date = structure(c(12437, 12478, 12486, 12487, 
12493, 12494, 12495, 12500, 12502, 12506, 12900, 12955, 12962, 
12964, 12977, 12982, 12983, 12985, 12991, 12992, 12993, 13032, 
13033, 13034, 13041, 13046, 13048, 13053, 13055, 13063, 13073, 
13074, 13075, 13082, 13083, 13084, 13094, 13096, 13097, 13101, 
13103, 13104, 13105, 13123, 13124, 13125, 13130, 13133, 13209, 
13214, 13235, 13242, 13244, 13263, 13272, 13277, 13285, 13291, 
13293, 13305, 13306, 13311, 13312, 13314, 13320, 13328, 13339, 
13342, 13346, 13354, 13356, 13357, 13405, 13406, 13410, 13419, 
13420, 13489, 13517, 13518, 13522, 13523, 13525, 13530, 13531, 
13535, 13542, 13543, 13544, 13550, 13551, 13552, 13559, 13560, 
13572, 13573, 13577, 13578, 13579, 13580, 13581, 13585, 13587, 
13592, 13593, 13594, 13600, 13601, 13620, 13621, 13622, 13626, 
13641, 13643, 13647, 13650, 13654, 13657, 13686, 13692, 13704, 
13711, 13717, 13718, 13720, 13726, 14569, 14629, 14630, 14637, 
14642, 14644, 14664, 14672, 14677, 14683, 14713, 14727, 14736, 
14272, 14782, 14789, 14805, 14816, 14825, 14866, 14874, 14880, 
14881, 14930, 14943, 14287, 14314, 14329, 14336, 14250, 14357, 
14362, 14369, 14370), class = "Date"), count = c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 
1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 12L, 2L, 1L, 1L, 
1L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 3L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 1L, 2L, 2L, 
2L, 1L, 3L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 4L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L)), .Names = c("date", 
"count"), row.names = c(NA, -160L), class = "data.frame")

这是我的每周间隔数据(为长度道歉):

structure(list(immigration = c(62L, 53L, 47L, 47L, 46L, 46L, 
47L, 49L, 49L, 43L, 47L, 41L, 46L, 44L, 41L, 45L, 52L, 49L, 47L, 
41L, 41L, 37L, 37L, 36L, 37L, 36L, 37L, 38L, 36L, 34L, 33L, 34L, 
32L, 35L, 34L, 38L, 40L, 43L, 43L, 42L, 42L, 41L, 42L, 48L, 46L, 
47L, 40L, 48L, 44L, 42L, 30L, 32L, 41L, 37L, 37L, 39L, 39L, 43L, 
39L, 39L, 42L, 41L, 41L, 37L, 39L, 37L, 40L, 40L, 41L, 41L, 41L, 
39L, 38L, 35L, 36L, 33L, 31L, 33L, 32L, 32L, 33L, 32L, 31L, 31L, 
33L, 33L, 29L, 32L, 38L, 37L, 36L, 38L, 39L, 41L, 39L, 38L, 39L, 
38L, 31L, 42L, 39L, 37L, 30L, 27L, 33L, 36L, 33L, 35L, 36L, 36L, 
35L, 34L, 39L, 42L, 41L, 44L, 93L, 83L, 91L, 70L, 81L, 100L, 
64L, 78L, 72L, 54L, 48L, 40L, 36L, 33L, 33L, 34L, 34L, 34L, 31L, 
31L, 33L, 32L, 31L, 33L, 38L, 38L, 41L, 40L, 39L, 41L, 41L, 43L, 
43L, 45L, 35L, 43L, 41L, 39L, 29L, 26L, 32L, 38L, 34L, 39L, 39L, 
39L, 39L, 39L, 39L, 42L, 42L, 43L, 42L, 43L, 44L, 41L, 43L, 52L, 
45L, 63L, 64L, 53L, 60L, 57L, 51L, 65L, 44L, 39L, 41L, 38L, 31L, 
30L, 29L, 30L, 31L, 31L, 33L, 35L, 36L, 36L, 37L, 36L, 36L, 38L, 
38L, 39L, 31L, 40L, 39L, 36L, 29L, 21L, 27L, 35L, 33L, 32L, 34L, 
36L, 35L, 32L, 35L, 33L, 34L, 31L, 31L, 33L, 34L, 34L, 33L, 33L, 
32L, 31L, 29L, 25L, 27L, 24L, 24L, 23L, 22L, 23L, 23L, 23L, 22L, 
22L, 21L, 21L, 24L, 23L, 27L, 28L, 29L, 29L, 29L, 30L, 31L, 31L, 
30L, 30L, 30L, 23L, 29L, 27L, 23L, 16L, 17L, 24L, 26L, 26L, 27L, 
28L, 29L, 27L, 29L, 29L, 29L, 28L, 29L, 29L, 29L, 30L, 30L, 29L, 
29L, 28L, 25L, 25L, 25L, 25L, 24L, 24L, 23L, 23L, 23L, 22L, 23L, 
22L, 22L, 21L, 22L, 22L, 23L, 25L, 25L, 26L, 27L, 26L, 27L, 26L, 
27L, 26L, 28L, 21L, 26L, 25L, 24L, 18L, 17L, 24L, 26L, 25L, 25L, 
25L, 24L, 24L, 25L, 26L, 28L, 27L, 32L, 26L, 27L, 29L, 40L, 87L, 
65L, 49L, 57L, 40L, 33L, 30L, 28L, 28L, 29L, 30L, 29L, 26L, 36L, 
26L, 23L, 21L, 21L, 23L, 22L, 24L, 27L, 25L, 26L, 24L, 25L, 26L, 
27L, 24L, 27L, 19L, 24L, 25L, 21L, 15L, 14L), weekStart = structure(c(12421, 
12428, 12435, 12442, 12449, 12456, 12463, 12470, 12477, 12484, 
12491, 12498, 12505, 12512, 12519, 12526, 12533, 12540, 12547, 
12554, 12561, 12568, 12575, 12582, 12589, 12596, 12603, 12610, 
12617, 12624, 12631, 12638, 12645, 12652, 12659, 12666, 12673, 
12680, 12687, 12694, 12701, 12708, 12715, 12722, 12729, 12736, 
12743, 12750, 12757, 12764, 12771, 12778, 12785, 12792, 12799, 
12806, 12813, 12820, 12827, 12834, 12841, 12848, 12855, 12862, 
12869, 12876, 12883, 12890, 12897, 12904, 12911, 12918, 12925, 
12932, 12939, 12946, 12953, 12960, 12967, 12974, 12981, 12988, 
12995, 13002, 13009, 13016, 13023, 13030, 13037, 13044, 13051, 
13058, 13065, 13072, 13079, 13086, 13093, 13100, 13107, 13114, 
13121, 13128, 13135, 13142, 13149, 13156, 13163, 13170, 13177, 
13184, 13191, 13198, 13205, 13212, 13219, 13226, 13233, 13240, 
13247, 13254, 13261, 13268, 13275, 13282, 13289, 13296, 13303, 
13310, 13317, 13324, 13331, 13338, 13345, 13352, 13359, 13366, 
13373, 13380, 13387, 13394, 13401, 13408, 13415, 13422, 13429, 
13436, 13443, 13450, 13457, 13464, 13471, 13478, 13485, 13492, 
13499, 13506, 13513, 13520, 13527, 13534, 13541, 13548, 13555, 
13562, 13569, 13576, 13583, 13590, 13597, 13604, 13611, 13618, 
13625, 13632, 13639, 13646, 13653, 13660, 13667, 13674, 13681, 
13688, 13695, 13702, 13709, 13716, 13723, 13730, 13737, 13744, 
13751, 13758, 13765, 13772, 13779, 13786, 13793, 13800, 13807, 
13814, 13821, 13828, 13835, 13842, 13849, 13856, 13863, 13870, 
13877, 13884, 13891, 13898, 13905, 13912, 13919, 13926, 13933, 
13940, 13947, 13954, 13961, 13968, 13975, 13982, 13989, 13996, 
14003, 14010, 14017, 14024, 14031, 14038, 14045, 14052, 14059, 
14066, 14073, 14080, 14087, 14094, 14101, 14108, 14115, 14122, 
14129, 14136, 14143, 14150, 14157, 14164, 14171, 14178, 14185, 
14192, 14199, 14206, 14213, 14220, 14227, 14234, 14241, 14248, 
14255, 14262, 14269, 14276, 14283, 14290, 14297, 14304, 14311, 
14318, 14325, 14332, 14339, 14346, 14353, 14360, 14367, 14374, 
14381, 14388, 14395, 14402, 14409, 14416, 14423, 14430, 14437, 
14444, 14451, 14458, 14465, 14472, 14479, 14486, 14493, 14500, 
14507, 14514, 14521, 14528, 14535, 14542, 14549, 14556, 14563, 
14570, 14577, 14584, 14591, 14598, 14605, 14612, 14619, 14626, 
14633, 14640, 14647, 14654, 14661, 14668, 14675, 14682, 14689, 
14696, 14703, 14710, 14717, 14724, 14731, 14738, 14745, 14752, 
14759, 14766, 14773, 14780, 14787, 14794, 14801, 14808, 14815, 
14822, 14829, 14836, 14843, 14850, 14857, 14864, 14871, 14878, 
14885, 14892, 14899, 14906, 14913, 14920, 14927, 14934, 14941, 
14948, 14955, 14962, 14969), class = "Date"), weekEnd = structure(c(12427, 
12434, 12441, 12448, 12455, 12462, 12469, 12476, 12483, 12490, 
12497, 12504, 12511, 12518, 12525, 12532, 12539, 12546, 12553, 
12560, 12567, 12574, 12581, 12588, 12595, 12602, 12609, 12616, 
12623, 12630, 12637, 12644, 12651, 12658, 12665, 12672, 12679, 
12686, 12693, 12700, 12707, 12714, 12721, 12728, 12735, 12742, 
12749, 12756, 12763, 12770, 12777, 12784, 12791, 12798, 12805, 
12812, 12819, 12826, 12833, 12840, 12847, 12854, 12861, 12868, 
12875, 12882, 12889, 12896, 12903, 12910, 12917, 12924, 12931, 
12938, 12945, 12952, 12959, 12966, 12973, 12980, 12987, 12994, 
13001, 13008, 13015, 13022, 13029, 13036, 13043, 13050, 13057, 
13064, 13071, 13078, 13085, 13092, 13099, 13106, 13113, 13120, 
13127, 13134, 13141, 13148, 13155, 13162, 13169, 13176, 13183, 
13190, 13197, 13204, 13211, 13218, 13225, 13232, 13239, 13246, 
13253, 13260, 13267, 13274, 13281, 13288, 13295, 13302, 13309, 
13316, 13323, 13330, 13337, 13344, 13351, 13358, 13365, 13372, 
13379, 13386, 13393, 13400, 13407, 13414, 13421, 13428, 13435, 
13442, 13449, 13456, 13463, 13470, 13477, 13484, 13491, 13498, 
13505, 13512, 13519, 13526, 13533, 13540, 13547, 13554, 13561, 
13568, 13575, 13582, 13589, 13596, 13603, 13610, 13617, 13624, 
13631, 13638, 13645, 13652, 13659, 13666, 13673, 13680, 13687, 
13694, 13701, 13708, 13715, 13722, 13729, 13736, 13743, 13750, 
13757, 13764, 13771, 13778, 13785, 13792, 13799, 13806, 13813, 
13820, 13827, 13834, 13841, 13848, 13855, 13862, 13869, 13876, 
13883, 13890, 13897, 13904, 13911, 13918, 13925, 13932, 13939, 
13946, 13953, 13960, 13967, 13974, 13981, 13988, 13995, 14002, 
14009, 14016, 14023, 14030, 14037, 14044, 14051, 14058, 14065, 
14072, 14079, 14086, 14093, 14100, 14107, 14114, 14121, 14128, 
14135, 14142, 14149, 14156, 14163, 14170, 14177, 14184, 14191, 
14198, 14205, 14212, 14219, 14226, 14233, 14240, 14247, 14254, 
14261, 14268, 14275, 14282, 14289, 14296, 14303, 14310, 14317, 
14324, 14331, 14338, 14345, 14352, 14359, 14366, 14373, 14380, 
14387, 14394, 14401, 14408, 14415, 14422, 14429, 14436, 14443, 
14450, 14457, 14464, 14471, 14478, 14485, 14492, 14499, 14506, 
14513, 14520, 14527, 14534, 14541, 14548, 14555, 14562, 14569, 
14576, 14583, 14590, 14597, 14604, 14611, 14618, 14625, 14632, 
14639, 14646, 14653, 14660, 14667, 14674, 14681, 14688, 14695, 
14702, 14709, 14716, 14723, 14730, 14737, 14744, 14751, 14758, 
14765, 14772, 14779, 14786, 14793, 14800, 14807, 14814, 14821, 
14828, 14835, 14842, 14849, 14856, 14863, 14870, 14877, 14884, 
14891, 14898, 14905, 14912, 14919, 14926, 14933, 14940, 14947, 
14954, 14961, 14968, 14975), class = "Date")), .Names = c("immigration", 
"weekStart", "weekEnd"), class = "data.frame", row.names = c(NA, 
-365L))

谢谢你的帮助!

4

3 回答 3

3

中每个日期的周开始y

y$weekStart <- y$date - as.POSIXlt(y$date)$wday

聚合这些以组合几周(从结果中省略现在不需要的date列):

yy <- aggregate(count ~ weekStart, data=y, FUN=sum)

最后,合并gIm

m <- merge(gIm, yy, all=TRUE)


> head(m, 10)
    weekStart immigration    weekEnd count
1  2004-01-04          62 2004-01-10    NA
2  2004-01-11          53 2004-01-17    NA
3  2004-01-18          47 2004-01-24     1
4  2004-01-25          47 2004-01-31    NA
5  2004-02-01          46 2004-02-07    NA
6  2004-02-08          46 2004-02-14    NA
7  2004-02-15          47 2004-02-21    NA
8  2004-02-22          49 2004-02-28    NA
9  2004-02-29          49 2004-03-06     1
10 2004-03-07          43 2004-03-13     2
于 2013-03-18T02:04:24.157 回答
2

这是一个使用 data.table 的选项。
您可以在适当的日期之前键入您的数据集。
然后我们可以“即时”(in)排列出每周的日期j并合并它们。

library(data.table)
gdt <- data.table(gIm, key="weekStart")
ydt <- data.table(y, key="date")


weeklyCounts <- 
ydt[setkey(gdt[, list("date"=seq(weekStart, weekEnd, length.out=7)), by=weekStart], "date")][
   , list(totalCounts = sum(count, na.rm=TRUE))
   , by="weekStart"]

gdt[ setkey(weeklyCounts, weekStart), totalCounts := totalCounts]

gdt
     immigration  weekStart    weekEnd totalCounts
  1:          62 2004-01-04 2004-01-10           0
  2:          53 2004-01-11 2004-01-17           0
  3:          47 2004-01-18 2004-01-24           1
  4:          47 2004-01-25 2004-01-31           0
  5:          46 2004-02-01 2004-02-07           0
 ---                                              
361:          24 2010-11-28 2010-12-04           1
362:          25 2010-12-05 2010-12-11           0
363:          21 2010-12-12 2010-12-18           0
364:          15 2010-12-19 2010-12-25           0
365:          14 2010-12-26 2011-01-01           0
于 2013-03-18T02:17:48.267 回答
1

data.table这是使用和滚动连接的另一个选项

weekData <- data.table(gIm, key = 'weekEnd')
dayData <- data.table(y, key = 'date')
# create a copy of the key column
weekData[, we := weekEnd]

# roll so that weekEnd can go back no more than 6 days
# this (the count column is then summed over the original weekEnd values stored in we
we <- weekData[dayData,roll= -6,nomatch = NA][, list(count = sum(count)), keyby =list(weekEnd = we)]

# join with original weekData, then set `NA` values in count to 0, 
# and remove the additional `we` column
weekSum <- (we[weekData])[is.na(count), c('count','we') := list(0L,NULL)]

head(weekSum, 10)
       weekEnd count immigration  weekStart
 1: 2004-01-10     0          62 2004-01-04
 2: 2004-01-17     0          53 2004-01-11
 3: 2004-01-24     1          47 2004-01-18
 4: 2004-01-31     0          47 2004-01-25
 5: 2004-02-07     0          46 2004-02-01
 6: 2004-02-14     0          46 2004-02-08
 7: 2004-02-21     0          47 2004-02-15
 8: 2004-02-28     0          49 2004-02-22
 9: 2004-03-06     1          49 2004-02-29
10: 2004-03-13     2          43 2004-03-07

滚动一定天数的能力是 data.table 1.8.8 的一个特性。从新闻

  • 除了 TRUE/FALSE,“roll”现在可以是正数(前滚/LOCF)或负数(后滚/NOCB)。有限的数字限制了值滚动的距离(有限的陈旧性)。roll=TRUE 和 roll=+Inf 是等价的。

编辑——一个(也许)更直接的版本

 weekData <- data.table(gIm, key = 'weekStart')
 weekly <- merge(weekData,
              weekData[dayData, roll= -6][,list(count = sum(count)), by = weekStart],
              all.x = TRUE, by = 'weekStart')

 head(weekly, n = 10)



     weekStart immigration    weekEnd count
 1: 2004-01-04          62 2004-01-10    NA
 2: 2004-01-11          53 2004-01-17    NA
 3: 2004-01-18          47 2004-01-24     1
 4: 2004-01-25          47 2004-01-31    NA
 5: 2004-02-01          46 2004-02-07    NA
 6: 2004-02-08          46 2004-02-14    NA
 7: 2004-02-15          47 2004-02-21    NA
 8: 2004-02-22          49 2004-02-28    NA
 9: 2004-02-29          49 2004-03-06     1
10: 2004-03-07          43 2004-03-13     2
于 2013-03-18T03:29:52.550 回答