0

我需要逐月获取 TOTAL 列的过去 30 年平均值。

数据集在此处可用:

library(dplyr)
    
ENSO <-read.table("http://www.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/detrend.nino34.ascii.txt", header = TRUE)
glimpse(ENSO)

例如,对于 2021 年 9 月,我需要计算:

 $$
 (TOTAL_{sep-2021} + 
 TOTAL_{sep-2020} +
 TOTAL_{sep-2019} +
 ...
 TOTAL_{sep-1991}) / 30 
 $$

我尝试使用dplyr::mutate,但我认为这sliderzoo可能在某个条件下会有所帮助,因为时间序列从 1950 年 1 月开始,显然我不会有最后 30 个平均值 MoM。

4

4 回答 4

0

谢谢你!

我可以通过使用来使用新变量

library(dplyr)
library(fpp3)

ENSO <- read.table("http://www.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/detrend.nino34.ascii.txt", header = TRUE) %>%
mutate(
Dates = paste(YR, "-", MON),
Dates = yearmonth(Dates),
Month_Year = paste(month.name[month(Dates)],"/", year(Dates)),
diff_total = difference(TOTAL),
ANOM = round( TOTAL - ClimAdjust, digits = 2),

 # TMA = TMA_{t-1} + TMA_{t} + TMA_{t+1}
TMA = round( slide_dbl(ANOM, mean, .before = 1, .after = 1), digits=2 ), 

# ´Climatic Condition`= if 5 last consecutives TMA > 0.5 then El Niño, otherwise if 5 last consecutives TMA < -0.5 then La Niña    

`Climatic Condition` = 
  lag( case_when(
         rollapplyr(TMA < -0.5, 5, all, fill = FALSE) ~ "La Niña", 
         rollapplyr(TMA >  0.5, 5, all, fill = FALSE) ~ "El Niño") ),
      
`3 months` = 
       
       case_when( 
         month(Dates) ==  1 ~ "DJF",
         month(Dates) ==  2 ~ "JFM",    
         month(Dates) ==  3 ~ "FMA",
         month(Dates) ==  4 ~ "MAM",
         month(Dates) ==  5 ~ "AMJ",
         month(Dates) ==  6 ~ "MJJ",
         month(Dates) ==  7 ~ "JJA",
         month(Dates) ==  8 ~ "JAS",
         month(Dates) ==  9 ~ "ASO",
         month(Dates) ==  10 ~ "SON",
         month(Dates) ==  11 ~ "OND",
         month(Dates) ==  12 ~ "NDJ" )

  ) %>% as_tsibble(index = Dates)

  ENSO <- ENSO %>% # To reorder the dtaaframe
  select(

  Dates,
  Month_Year,
  YR,
  MON,
  TOTAL,
  ClimAdjust,
  ANOM,
  TMA,
  `3 months`,
  `Climatic Condition`, 
   diff_total

   )


  ClimAdj <- ENSO %>% 
  group_by(MON) %>% 
  summarise(ClimAdj = mean(TOTAL) )

  ENSO <- left_join(ENSO, ClimAdj %>%
          select(Dates, ClimAdj), by = c("Dates" = "Dates"))   

  ENSO <- ENSO %>%
       select(
           -MON.y
              ) %>%
   rename(
           MON = "MON.x"
          ) 

      ENSO <- ENSO %>%
    select(

    Dates,
    Month_Year,
    YR,
    MON,
    TOTAL,
    #ClimAdjust,
    ClimAdj,
    ANOM,
    TMA,
    `3 months`,
    `Climatic Condition`, 
    diff_total

 )  

 glimpse(ENSO)
于 2021-10-14T17:13:47.110 回答
0

您可以将当前年份固定为 2021 年,也可以在表格中获取更高的年份。

然后,您只需要过滤掉低于当前年份负 30 的年份。

如果您想对此进行修饰,您甚至可以为您的列使用自定义名称。

这是代码:

current_y = max(ENSO$YR)
col_name = paste0("total_mean_", current_y-30, "_to_", current_y)
ENSO %>% 
    filter(YR>current_y-30) %>% 
    group_by(MON) %>% 
    summarise(!!col_name:=mean(TOTAL))

# # A tibble: 12 x 2
#     MON total_mean_1991_to_2021
#   <int>                   <dbl>
# 1     1                    26.5
# 2     2                    26.7
# 3     3                    27.3
# 4     4                    27.8
# 5     5                    27.9
# 6     6                    27.7
# 7     7                    27.3
# 8     8                    26.8
# 9     9                    26.7
#10    10                    26.7
#11    11                    26.7
#12    12                    26.5
于 2021-10-14T13:47:14.110 回答
0

这是一种data.table方法。

功能:
1 - 按月将表格拆分为表格列表
2 - 计算过去 30 个月的滚动平均值
3 - 将每月表格行绑定到单个表格

library(data.table)
# Make ENSO a data.table, key by year and month
setDT(ENSO, key = c("YR", "MON"))
# Split by MON
L <- split(ENSO, by = "MON")
# Loop over L, create monthly mean over the last 30 entries
L <- lapply(L, function(x) {
  x[, MON30_avg := frollmean(TOTAL, n = 30)]
})
# Rowbind List together again
final <- rbindlist(L, use.names = TRUE, fill = TRUE)
于 2021-10-14T13:54:50.880 回答
-1

这是更新的代码。您首先按年和月排列,然后切片最后 360 个月(30 年)!然后按月分组,然后计算平均值:

ENSO %>% arrange(YR, MON) %>% slice_tail(n = 360) %>% group_by(MON) %>% summarise(mean(TOTAL))

希望这是你想要的。每个月都有过去 30 年的平均值。

于 2021-10-14T13:19:43.350 回答