3

我正在尝试用 R(统计编程语言)编写一个函数,它可以让我自动计算线性回归(lm)

问题:回归是通过“step”函数计算的,所以无法提前知道选择的系数。问题

  1. 自动识别由阶跃函数选择的系数。

  2. Vlookup 并将结果的第二列 Ex.“View(OpenCoefs)”(估计)与原始数据框“sp”的各个列的最后一行(最后一天)交叉相乘

理想的解决方案是一个函数,我只需键入“run()”,它将为每个回归返回“y”,即对第二天标准普尔 500 指数的预测(开盘、低点、高点、收盘) .

该代码从 yahoo Finance 网站检索数据,因此如果您运行它,它就可以运行。

这是代码。

sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))

sp$Adj.Close<-NULL

sp<-sp[nrow(sp):1,]

sp<-as.data.frame(sp)


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Open" ] <-
    ( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_High" ] <-
    ( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Low" ] <-
    ( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Close" ] <-
    ( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Volume" ] <-
    ( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1       
} 

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,nRows_in_sp)


Open_Rollin<-NA

sp<-cbind(sp,Open_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_Rollin" ]<-0 
} else {
sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
}
}


Close_Rollin<-NA

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,Close_Rollin)

for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , " Close_Rollin" ]<-0 
} else {
sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
}
}



Low_Rollin<-NA

sp<-cbind(sp,Low_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_Rollin" ]<-0 
} else {
sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
}
}


High_Rollin<-NA

sp<-cbind(sp,High_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_Rollin" ]<-0 
} else {
sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
}
}


Open_GR_Rollin<-NA

sp<-cbind(sp,Open_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_GR_Rollin" ]<-0 
} else {
sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
}
}



Close_GR_Rollin<-NA

sp<-cbind(sp, Close_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Close_GR_Rollin" ]<-0 
} else {
sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
}
}



Low_GR_Rollin<-NA

sp<-cbind(sp, Low_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_GR_Rollin" ]<-0 
} else {
sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
}
}


High_GR_Rollin<-NA

sp<-cbind(sp, High_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_GR_Rollin" ]<-0 
} else {
sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
}
}


Open_SD_Rollin<-NA

sp<-cbind(sp,Open_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
} 
}



Close_SD_Rollin<-NA

sp<-cbind(sp, Close_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
} 
}


Low_SD_Rollin<-NA

sp<-cbind(sp, Low_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
} 
}



High_SD_Rollin<-NA

sp<-cbind(sp, High_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
} 
}


N <- length(sp[,"Open"])



Openlag <- c(NA, sp[,"Open"][1:(N-1)])
sp<-cbind(sp,Openlag)

Highlag <- c(NA, sp[,"High"][1:(N-1)])

sp<-cbind(sp,Highlag)

Lowlag <- c(NA, sp[,"Low"][1:(N-1)])

sp<-cbind(sp,Lowlag)

Closelag <- c(NA, sp[,"Close"][1:(N-1)])

sp<-cbind(sp,Closelag)


Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])

sp<-cbind(sp,Gr_Openlag)

Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])

sp<-cbind(sp,Gr_Highlag)

Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])

sp<-cbind(sp,Gr_Lowlag)

Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])

sp<-cbind(sp,Gr_Closelag)

Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])

sp<-cbind(sp,Gr_Volumelag)



Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_GR_Rollinlag)

Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_GR_Rollinlag)

High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, High_GR_Rollinlag)

Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_GR_Rollinlag)


Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_SD_Rollinlag)

Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_SD_Rollinlag)

High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, High_SD_Rollinlag)

Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_SD_Rollinlag)




OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


View(OpenCoefs)

View(LowCoefs)

View(HighCoefs)

View(CloseCoefs)

View(sp)
4

1 回答 1

6

你的代码太糟糕了,我不得不同情你。:) 这是您的代码的重构版本:

library(quantmod)
sp <- getSymbols("^GSPC", auto.assign=FALSE)
sp$GSPC.Adjusted <- NULL
colnames(sp) <- gsub("^GSPC\\.","",colnames(sp))

sp$Gr_Open   <- ROC(Op(sp), type="discrete")
sp$Gr_High   <- ROC(Hi(sp), type="discrete")
sp$Gr_Low    <- ROC(Lo(sp), type="discrete")
sp$Gr_Close  <- ROC(Cl(sp), type="discrete")
sp$Gr_Volume <- ROC(Vo(sp), type="discrete")

N <- 100
sp$Open_Rollin  <- runMean(sp$Open, N)
sp$High_Rollin  <- runMean(sp$High, N)
sp$Low_Rollin   <- runMean(sp$Low, N)
sp$Close_Rollin <- runMean(sp$Close, N)

sp$Open_GR_Rollin  <- runMean(sp$Gr_Open, N)
sp$High_GR_Rollin  <- runMean(sp$Gr_High, N)
sp$Low_GR_Rollin   <- runMean(sp$Gr_Low, N)
sp$Close_GR_Rollin <- runMean(sp$Gr_Close, N)

sp$Open_SD_Rollin  <- runSD(sp$Open, N)
sp$High_SD_Rollin  <- runSD(sp$High, N)
sp$Low_SD_Rollin   <- runSD(sp$Low, N)
sp$Close_SD_Rollin <- runSD(sp$Close, N)

spLag <- lag(sp)
colnames(spLag) <- paste(colnames(sp),"lag",sep="")
sp <- na.omit(merge(sp, spLag))

没有必要回答你的第一个问题来回答你的第二个问题。您不必手动将系数与数据交叉相乘。您可以简单地从模型中访问拟合值。这需要您保留模型...

f <- Open ~ Openlag + Lowlag + Highlag + Closelag +
  Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag +
  Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag +
  Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag

OpenLM <- lm(f, data=sp)
HighLM <- update(OpenLM, High ~ .)
LowLM <- update(OpenLM, Low ~ .)
CloseLM <- update(OpenLM, Close ~ .)

OpenStep <- step(OpenLM,direction="both",test="F")
HighStep <- step(HighLM,direction="both",test="F")
LowStep <- step(LowLM,direction="both",test="F")
CloseStep <- step(CloseLM,direction="both",test="F")

tail(fitted(OpenStep),1)
# 2013-02-01 
#    1497.91 
tail(fitted(HighStep),1)
# 2013-02-01 
#    1504.02 
tail(fitted(LowStep),1)
# 2013-02-01 
#   1491.934 
tail(fitted(CloseStep),1)
# 2013-02-01 
#   1499.851
于 2013-02-04T20:49:30.070 回答