我正在尝试清理和训练 Dodger Loop 数据,以使用 19 的每 5 分钟的平均汽车数量来记录小于 18 的任何记录值。我已经删除了所有丢失的数据,但似乎可以弄清楚如何取 -1到 17 个值并将它们切换到 19。
#Set working directory
setwd("xxx")
#import Traffic Data dodger.csv rename it to traffic
Traffic <- read.csv(xxx header=FALSE)
View(Traffic)
#import doger game data dodger.events rename it to games
Games <- read.csv("xxx header=FALSE)
View(Games)
#open class and ggplot pacakages
library(class)
library(tm)
library(data.table)
library(ggplot2)
library(caret)
#View dat structure
str(Traffic)
#Find the median of traffic to use for missing data
summary(Traffic)
#
str(Games)
summary(Games)
#Remove uneeded columns and Name ID Columns
Games <- Games[-6]
Games <- Games[-5]
View(Games)
names(Games)[1]<-"Date"
names(Games)[2]<-"Start Time"
names(Games)[3]<-"End"
names(Games)[4]<-"Attendance"
names(Games)[2]<-"Start"
View(Games)
#Name ID Colums in Traffic
View(Traffic)
names(Traffic)[1]<-"Date and Time"
names(Traffic)[2]<-"Amount"
View(Traffic)
#Find the mean of Traffic
mean(Traffic$Amount)
[1] NA
#Mean is undefined missing data, replace with mean
mean(Traffic$Amount, na.rm = T)
[1] 18.95073
#Averacars per 5 min is 18.9, round to 19
#Replace missing (-1) data with average car per 5 min
Avg_Traffic <- ave(Traffic$Amount, FUN = function(x)
mean(x, na.rm = T))
Traffic$Amount <- ifelse(is.na(Traffic$Amount),
Avg_Traffic, Traffic$Amount)
##Traffic$Amount[x <18]?????