3

我试图在从政府机构收到的数据框中重命名所有这些糟糕的列名。

> colnames(thedata)
 [1] "Region"                                      "Resource Assessment Site ID"                
 [3] "Site Name/Facility"                          "Design Head (feet)"                         
 [5] "Design Flow (cfs)"                           "Installed Capacity (kW)"                    
 [7] "Annual Production (MWh)"                     "Plant Factor"                               
 [9] "Total Construction Cost (1,000 $)"           "Annual O&M Cost (1,000 $)"                  
[11] "Cost per Installed Capacity ($/kW)"          "Benefit Cost Ratio with Green Incentives"   
[13] "IRR with Green Incentives"                   "Benefit Cost Ratio without Green Incentives"
[15] "IRR without Green Incentives" 

列标题有特殊的非字母数字字符和空格,所以引用它们是不可能的,所以我必须重命名它们。我想用句点替换所有非字母数字字符。但我试过:

old.col.names <- colnames(thedata)
new.col.names <- gsub("^a-z0-9", ".", old.col.names)

^ 是“非”描述,所以我认为它将用 old.col.names 中的句点替换所有不是字母数字的内容。

任何人都可以帮忙吗?

4

1 回答 1

0

以下是三个可供考虑的选项:

make.names(x)
gsub("[^A-Za-z0-9]", ".", x)
names(janitor::clean_names(setNames(data.frame(matrix(NA, ncol = length(x))), x)))

以下是每个的样子:

make.names(x)
##  [1] "Region"                                      "Resource.Assessment.Site.ID"                
##  [3] "Site.Name.Facility"                          "Design.Head..feet."                         
##  [5] "Design.Flow..cfs."                           "Installed.Capacity..kW."                    
##  [7] "Annual.Production..MWh."                     "Plant.Factor"                               
##  [9] "Total.Construction.Cost..1.000..."           "Annual.O.M.Cost..1.000..."                  
## [11] "Cost.per.Installed.Capacity....kW."          "Benefit.Cost.Ratio.with.Green.Incentives"   
## [13] "IRR.with.Green.Incentives"                   "Benefit.Cost.Ratio.without.Green.Incentives"
## [15] "IRR.without.Green.Incentives"               

gsub("[^A-Za-z0-9]", ".", x)
##  [1] "Region"                                      "Resource.Assessment.Site.ID"                
##  [3] "Site.Name.Facility"                          "Design.Head..feet."                         
##  [5] "Design.Flow..cfs."                           "Installed.Capacity..kW."                    
##  [7] "Annual.Production..MWh."                     "Plant.Factor"                               
##  [9] "Total.Construction.Cost..1.000..."           "Annual.O.M.Cost..1.000..."                  
## [11] "Cost.per.Installed.Capacity....kW."          "Benefit.Cost.Ratio.with.Green.Incentives"   
## [13] "IRR.with.Green.Incentives"                   "Benefit.Cost.Ratio.without.Green.Incentives"
## [15] "IRR.without.Green.Incentives"               

library(janitor)
names(clean_names(setNames(data.frame(matrix(NA, ncol = length(x))), x)))
##  [1] "region"                                      "resource_assessment_site_id"                
##  [3] "site_name_facility"                          "design_head_feet"                           
##  [5] "design_flow_cfs"                             "installed_capacity_kw"                      
##  [7] "annual_production_mwh"                       "plant_factor"                               
##  [9] "total_construction_cost_1_000"               "annual_o_m_cost_1_000"                      
## [11] "cost_per_installed_capacity_kw"              "benefit_cost_ratio_with_green_incentives"   
## [13] "irr_with_green_incentives"                   "benefit_cost_ratio_without_green_incentives"
## [15] "irr_without_green_incentives"               

样本数据:

x <- c("Region", "Resource Assessment Site ID", "Site Name/Facility", 
    "Design Head (feet)", "Design Flow (cfs)", "Installed Capacity (kW)", 
    "Annual Production (MWh)", "Plant Factor", "Total Construction Cost (1,000 $)", 
    "Annual O&M Cost (1,000 $)", "Cost per Installed Capacity ($/kW)", 
    "Benefit Cost Ratio with Green Incentives", "IRR with Green Incentives", 
    "Benefit Cost Ratio without Green Incentives", "IRR without Green Incentives")
于 2018-02-21T18:20:08.090 回答