0

我有时间序列数据有差距。我正在应用运行平均值并减少丢失数据的天数,我na.approx()在平滑之前一直在进行插值。在绘图时,我不想显示这些插值,因此我随后再次用 NA 替换它们。为了实现这一切,我的代码非常冗长。

我一直在尝试减少代码行并简化流程,并了解到我可以rollmean()直接在 ggplot 中调用。但是,因为它没有在差距上进行插值,所以我失去了 9 个“好”天的数据。因为我们有一个强烈的季节性模式,这很重要,因为我们有点失去它。

df<-structure(list(Date = structure(c(16375, 16376, 16377, 16378, 
16379, 16380, 16381, 16382, 16383, 16384, 16385, 16386, 16387, 
16388, 16389, 16390, 16391, 16392, 16393, 16394, 16395, 16396, 
16397, 16398, 16399, 16400, 16401, 16402, 16403, 16404, 16405, 
16406, 16407, 16408, 16409, 16410, 16411, 16412, 16413, 16414, 
16415, 16416, 16417, 16418, 16419, 16420, 16421, 16422, 16423, 
16424, 16425, 16426, 16427, 16428, 16429, 16430, 16431, 16432, 
16433, 16434, 16435, 16436, 16437, 16438, 16439, 16440, 16441, 
16442, 16443, 16444, 16445, 16446, 16447, 16448, 16449, 16450, 
16451, 16452, 16453, 16454, 16455, 16456, 16457, 16458, 16459, 
16460, 16461, 16462, 16463, 16464, 16465, 16466, 16467, 16468, 
16469, 16470, 16471, 16472, 16473, 16474, 16475, 16476, 16477, 
16478, 16479, 16480, 16481, 16482, 16483, 16484, 16485, 16486, 
16487, 16488, 16489, 16490, 16491, 16492, 16493, 16494, 16495, 
16496, 16497, 16498, 16499, 16500, 16501, 16502, 16503, 16504, 
16505, 16506, 16507, 16508, 16509, 16510, 16511, 16512, 16513, 
16514, 16515, 16516, 16517, 16518, 16519, 16520, 16521, 16522, 
16523, 16524, 16525, 16526, 16527, 16528, 16529, 16530, 16531, 
16532, 16533, 16534, 16535, 16536, 16537, 16538, 16539, 16540, 
16541, 16542, 16543, 16544, 16545, 16546, 16547, 16548, 16549, 
16550), class = "Date"), value = c(97.525, 96.95, 96.445, 96.795, 
97, 96.56, 96.615, 96.915, 96.51, 96.84, 96.98, 97.075, 96.88, 
96.98, 96.69, 96.86, 97.155, 97.355, 97.805, 97.805, 97.91, 97.185, 
96.7, 97.755, 98.035, 99.905, 99.665, 100.825, 100.36, 100.045, 
99.28, 101.225, 98.12, 98.045, 100.215, 100.96, 99.365, 104.225, 
99.53, 101.24, 101.24, 100.23, 101.17, 103.605, 101.12, 101.095, 
104.31, 103.665, 102.21, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 114.205, 115.435, 117, 116.715, 116.59, 117.8, 116.85, 
118.27, 118.18, 119.83, 118.39, 117.17, 116.49, 117.035, 114.945, 
116.24, 117.565, 117.31, 118.38, 117.815, 115.73, 116.39, 117.005, 
116.86, 117.33, 116.08, 114.82, 114.775, 116.06, 115.005, 117.055, 
118.215, 116.93, 116.085, 118, 117.965, 118.385, 118.425, 118.67, 
115.485, 116.5, 118.07, 116.435, 116.48, 115.7, 115.185, 112.34, 
112.16, 114.505, 115.125, 115.115, 114.57, 112.53, 115.175, 114.02, 
112.075, 113.09, 113.31, 112.81, 111.095, 109.465, 109.13, 111.565, 
110.275, 110.525, 109.565, 111.14, 107.375, 108.18, 108.085, 
106.91, 105.3, 107.21, 108.375, 105.79, 107.04, 107.83, 108.59, 
107.665, 105.125, 104.445, 104.825, 105.605)), row.names = c("32", 
"33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", 
"44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", 
"55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", 
"66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", 
"77", "78", "79", "80", "110", "210", "310", "410", "510", "610", 
"710", "810", "98", "101", "111", "121", "131", "141", "151", 
"161", "171", "181", "191", "201", "211", "221", "231", "241", 
"251", "261", "271", "281", "291", "301", "311", "321", "331", 
"341", "351", "361", "371", "381", "391", "401", "411", "421", 
"431", "441", "112", "212", "312", "412", "511", "611", "711", 
"811", "99", "102", "113", "122", "132", "142", "152", "162", 
"172", "182", "192", "202", "213", "222", "232", "242", "252", 
"262", "272", "282", "292", "302", "313", "322", "332", "342", 
"352", "362", "372", "382", "392", "402", "413", "422", "432", 
"442", "451", "461", "471", "481", "491", "501", "512", "521", 
"531", "541", "551", "561", "571", "581", "591", "601", "612", 
"621", "631", "641", "651", "661", "671", "681", "691", "701", 
"712", "721", "731", "741", "751", "761", "771", "781", "791", 
"801", "812", "821", "831"), class = "data.frame")

require(zoo)
require(TTR)
df.ts<-read.zoo(df, format = "%Y-%m-%d")
df.ts.na<-na.approx(df.ts, rule=2)
df.SMA10<-SMA(df.ts.na, n=10)

df.ts.df<-data.frame(Date=as.Date(time(df.SMA10)), value=df.SMA10)
i<-which(rownames(df.ts.df) %in% "2014-12-20")
j<-which(rownames(df.ts.df) %in% "2015-02-01")
#df.ts.df[i:j,'value']<-NA
df.ts.df$gap<-NA
df.ts.df$gap[i:j]<-1
df.ts.df$gap[c(1:(i-1),(j+1):nrow(df.ts.df))]<-0

ggplot(data=df.ts.df, aes(x=Date, y=value, col=gap))+
  geom_line(size=0.75, na.rm=T) +
  geom_line(data=df, aes(x=Date, y=value), color='orange', na.rm=T) +
  geom_line(data=df, aes(x=Date, y=rollmean(value, 10, align = "right", fill = NA)), color='red', size=0.85, linetype='dotdash', na.rm=T) +
  theme_classic() +
  theme(legend.position='none') 

有没有办法通过在 rollmean 中包含 na.approx 来解决这个问题?

为了说明我的意思:橙色线是原始时间序列蓝色/黑色实线是组合的结果,na.approx()蓝色SMA()部分是插值部分。虚线红线来自rollmean()直接在 ggplot 中调用。

时间序列的ggplot

rollmean()我想使用within绘制尽可能接近黑色的虚线红线ggplot

4

0 回答 0