我想在 R 中优化以下代码。这个循环需要很长时间才能运行。我想知道是否有人可以帮助我优化这段代码,因为它需要很长时间才能运行?我谢谢大家!
SIN_FM5
: 是一个大约有 300.000 行和 7 列的数据库。
# Make Combination
SIN_FM5$Combination=(SIN_FM5$SINISTRE)
Count.Comb=data.frame(table(SIN_FM5$Combination))
# Calculate number of combinations
Total.Comb=nrow(Count.Comb)
# Loop through all combinations and calculate statistics
Statistics=array(0,dim=c(Total.Comb,5))
for (i in 1:Total.Comb) {
Subset=subset(SIN_FM5, SIN_FM5$Combination==Count.Comb[i,1])
Statistics[(i),]=c(Count.Comb[i,1],mean(Subset$MONTANT_PAIEMENT),
median(Subset$MONTANT_PAIEMENT),min(Subset$MONTANT_PAIEMENT),
max(Subset$MONTANT_PAIEMENT))
}
resultatN=cbind(Count.Comb,Statistics)
dput(head(SIN_FM5))
控制台~/
"TRSP-5194", "TRSP-5197", "TRSP-5201", "TRSP-5202", "TRSP-5204",
"TRSP-5205", "TRSP-5207", "TRSP-5212", "TRSP-5214", "TRSP-5215",
"TRSP-5218", "TRSP-5222", "TRSP-5230", "TRSP-5238", "TRSP-5243",
"TRSP-5247", "TRSP-5248", "TRSP-5253", "TRSP-5254", "TRSP-5255",
"TRSP-5257", "TRSP-5259", "TRSP-5262", "TRSP-5263", "TRSP-5266",
"TRSP-5267", "TRSP-5268", "TRSP-5270", "TRSP-5271", "TRSP-5274",
"TRSP-5277", "TRSP-5279", "TRSP-5281", "TRSP-5283", "TRSP-5288",
"TRSP-5289", "TRSP-5293", "TRSP-5296", "TRSP-5299", "TRSP-5301",
"TRSP-5303", "TRSP-5304", "TRSP-5306", "TRSP-5308", "TRSP-5310",
"TRSP-5311", "TRSP-5312", "TRSP-5313", "TRSP-5335", "TRSP-5343",
"TRSP-5348", "TRSP-5352", "TRSP-5357", "TRSP-5363", "TRSP-5366",
"TRSP-5372", "TRSP-5373", "TRSP-5384", "TRSP-5386", "TRSP-5388",
"TRSP-5391", "TRSP-5392", "TRSP-5428", "TRSP-5436", "VANBILSENYolanda",
"VanLierop", "VirgaJesseZiekenhuis", "WanetGeorges", "WANETThierry",
"WILLEMSMichel", "WUESTENBERGHSAlain", "X01", "X01CR", "X02CR",
"X03CR", "X04CR", "X05CR", "X06CR", "X07CR", "Y01", "Y01CR",
"Y02", "ZOPO-5344"), class = "factor"), Combination = c(73010009L,
73010009L, 73010014L, 73010014L, 73010014L, 73010014L)), .Names = c("SINISTRE",
"victimeid", "Nature.Injury", "LocationL", "DurationITT", "Code.Nace2008",
"POLICE", "TYPE_DE_PAIEMENT", "MODE_DE_PAIEMENT", "CODE_NATURE_DE_PAIEMENT",
"MONTANT_PAIEMENT", "BENEFICIARE", "Combination"), row.names = c(NA,
6L), class = "data.frame")