对于此时间序列分析,您可以使用自回归模型。首先,您必须创建滞后输入集并创建一个数据框。在下面的代码块中有 4 个输入集,其中包含一个滞后、两个滞后和三个滞后。(阅读有关自回归模型的更多信息 - [https://otexts.com/fpp2/AR .html][1])
exchangeEUR <- read_excel("ExchangeUSD.xlsx") %>%
janitor::clean_names() %>%
mutate(date_in_ymd = ymd(yyyy_mm_dd)) %>%
select(-1) %>%
select(date_in_ymd,everything())
eur_exchange_full = exchangeEUR %>%
mutate(previous_one_day_set_a = lag(exchangeEUR$usd_eur,1),
previous_one_day_set_b = lag(exchangeEUR$usd_eur,1),
previous_two_day_set_b = lag(exchangeEUR$usd_eur,2),
previous_one_day_set_c = lag(exchangeEUR$usd_eur,1),
previous_two_day_set_c = lag(exchangeEUR$usd_eur,2),
previous_three_day_set_c = lag(exchangeEUR$usd_eur,3),
previous_one_day_set_d = lag(exchangeEUR$usd_eur,1),
previous_two_day_set_d = lag(exchangeEUR$usd_eur,2),
five_day_rolling = rollmean(usd_eur,5, fill = NA),
ten_day_rolling = rollmean(usd_eur,10, fill = NA)) %>%
drop_na()
规范化数据
# We can create a function to normalize the data from 0 to 1
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x))) }
# All the variables are normalized
normalized_eur = eur_exchange_full %>%
mutate(across(2:12, ~normalize(.x)))
# Look at the data that has been normalized
summary(normalized_eur)
boxplot(normalized_eur$usd_eur)
set.seed(123)
eur_train <- normalized_eur[1:400,]
eur_test <- normalized_eur[401:491,]
# We can create a function to unnormalize the data=
unnormalize <- function(x, min, max) {
return( (max - min)*x + min ) }
# Get the min and max of the original training values
eur_min_train <- min(eur_exchange_full[1:400,2])
eur_max_train <- max(eur_exchange_full[1:400,2])
# Get the min and max of the original testing values
eur_min_test <- min(eur_exchange_full[401:491,2])
eur_max_test <- max(eur_exchange_full[401:491,2])
# Check the range of the min and max of the training dataset
eur_min_test
eur_min_train
eur_max_test
eur_max_train
测试不同架构的神经网络
set.seed(12345)
# function setup that creates 2 layer model
model_two_hidden_layers = function(hidden,sec_hidden) {
nn_model_true = neuralnet(usd_eur ~ previous_one_day_set_b+previous_two_day_set_b, data=eur_train, hidden=c(
hidden,sec_hidden), linear.output=TRUE)
#plot(nn_model_true)
pred <- predict(nn_model_true, eur_test)
validation_df <- data.frame(c(eur_test$date_in_ymd),c(pred),c(eur_test$usd_eur))
p = ggplot() +
geom_line(data = validation_df, aes(x = c.eur_test.date_in_ymd., y = c.pred.), color = "blue") +
geom_line(data = validation_df, aes(x = c.eur_test.date_in_ymd., y = c.eur_test.usd_eur.), color = "red") +
xlab('Dates') +
ylab('percent.change')
print(p)
train_results = compute(nn_model_true,eur_test[,2:3])
truthcol = eur_exchange_full[401:491,2]$usd_eur
predcol = unnormalize(train_results$net.result,eur_min_train, eur_max_train)[,1]
relevant_pred_stat(truthcol,predcol,
"Two Hidden Layers") %>%
mutate(hiddel_layers = paste0(hidden, " and ",sec_hidden),
input_set = "B") %>%
filter(.metric != "rsq")
}
model_two_hidden_layers(2,3)
# save the stat indices to a dataframe
set_a_models_two_layers = results_two_hidden_layers %>%
select(-estimator) %>%
pivot_wider(names_from = metric, values_from = estimate) %>%
arrange(rmse)
kable(set_a_models_two_layers[1:10,])
##########################################################################
# three layer model
set.seed(12345)
# function setup that creates 3 layer model
model_three_hidden_layers = function(hidden,sec_hidden,third_hidden) {
nn_model_true = neuralnet(usd_eur ~ previous_one_day_set_b+previous_two_day_set_b, data=eur_train, hidden=c(hidden,sec_hidden,third_hidden), linear.output=TRUE)
#plot(nn_model_true)
pred <- predict(nn_model_true, eur_test)
validation_df <- data.frame(c(eur_test$date_in_ymd),c(pred),c(eur_test$usd_eur))
################
p = ggplot() +
geom_line(data = validation_df, aes(x = c.eur_test.date_in_ymd., y = c.pred.), color = "blue") +
geom_line(data = validation_df, aes(x = c.eur_test.date_in_ymd., y = c.eur_test.usd_eur.), color = "red") +
xlab('Dates') +
ylab('percent.change')
print(p)
################
train_results = compute(nn_model_true,eur_test[,2:3])
truthcol = eur_exchange_full[401:491,2]$usd_eur
predcol = unnormalize(train_results$net.result,eur_min_train, eur_max_train)[,1]
relevant_pred_stat(truthcol,predcol,
"Three Hidden Layers") %>%
mutate(hiddel_layers = paste0(hidden, " and ",sec_hidden," and ",third_hidden),
input_set = "A") %>%
filter(.metric != "rsq")
}
model_three_hidden_layers(7,4,1)