我制作了这段代码,通过平均时间来划分值,然后我输入了所需的初始模型。但我不明白。有人请给我任何建议吗?我想再次使用 reshape() 或 dcast() 吗?
AA<-read.table("AA_NYS_USD_1min_EST5EDT_200407s12e.dat", header=FALSE,as.is=TRUE, col.names=c("date","hour", "price","volume"))
str(AA)
AA$date <- as.Date(AA$date, "%d.%m.%Y")
AA$hour <- format(strptime(AA$hour, "%H:%M:%S"), "%H:%M")
AA$houraid <- as.numeric(as.factor(AA$hour))
head(AA, 3)
data hour price volume hourid
1 01.07.2004 09:31 51.37 27900 1
2 01.07.2004 09:32 51.32 100 2
3 01.07.2004 09:33 51.45 700 3
require(reshape2)
library(reshape)
AA2 <- dcast(AA, data~hour, value.var='volume') ### Utilizando texto das horas
head(AA2[,1:8], 3)
date 09:31 09:32 09:33 09:34 09:35 09:36 09:37
1 2004-07-01 27900 100 700 1000 800 3000 1600
2 2004-07-02 145400 500 NA 500 6900 NA 4400
3 2004-07-06 3200 2300 3800 1500 NA 1100 3700
library(dplyr)
a<-AA %>% group_by(hour) %>% summarise(mean = mean(volume,na.rm=TRUE))
for(k in 2:396){
AA2[,k]<-AA2[,k]/a[k,2]
}
for( i in 1:nrow(a)){
for(j in 1:nrow(AA)){
if(AA[j,2]==a[i,1]){
AA$volumeMean[j]<-AA[j,4]/a[i,2]
}
}
}
write.table(AA3,"AA_Complete.txt")
谢谢!!!