r - 奇怪的 lapply 有问题我无法理解

Question

我有一个list长度为 2 的“hhvrs”，带有名称和值。这两个列表元素的名称是年份“1920”和“1929”。

$`1920`
      Nykvarn - 147 - 211920       Nykvarn - 262 - 211920 ...
                    1.235629                     1.013191 ...
$`1929`
        Långed - 125 - 11929         Långed - 126 - 11929 ...
                    1.316499                     1.026785 ...

我还有一个data.frame由 1920 年和 1929 年两年组成的“数据”。请参阅dput这篇文章的底部。
然后我想否定匹配名称（即不包括list上面存在的那些名称）。换句话说，我想将名称保留在我的数据框中的最后一列uniquezCorrectCG中，这些名称不存在于list上面。然后，我想计算每个公司的效率，而列表中没有出现名称。

这是我的代码：

hhvrsu=lapply(unique(data$year),function(x){
library(Benchmarking)
datat=data[data$year==x,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[x]])),]  

#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])

#inputs
  x=with(datat2,
    cbind(Labour_input_1000_hour,
          Capital_input_1000_sek,
          Electric_input_Mwh,
          Rawmaterial_input_M3))

rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
return(e$eff) }            
)    
names(hhvrsu)=unique(data$year)

但这失败了。例如，公司Långed - 125 - 11929年份 1929 仍然存在于我的代码的输出中，但它应该被删除，因为Långed - 125 - 11929它存在于上面的列表中......

head(hhvrsu[["1929"]])

Billingsfors - 123 - 11929 Billingsfors - 124 - 11929       Långed - 125 - 11929       Långed - 126 - 11929       Långed - 127 - 11929 
                 0.9975506                  1.0000000                  1.0000000                  1.0000000                  1.0000000 
    Hånsfors - 183 - 21929 
                 0.9928677

但是it still works if i do it manually：

datat=data[data$year==1929,]


  datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[["1929"]])),]  

  #
  y <- datat2[,"Ouput_ton",drop=FALSE]
  rownames(y)=paste(datat2[,5],"-",datat2[,4])


  #inputs
  x=with(datat2,cbind(Labour_input_1000_hour,Capital_input_1000_sek,Electric_input_Mwh,Rawmaterial_input_M3))

  rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])

  e <- dea(x,y,RTS="vrs")

head(e$eff)
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929     Hånsfors - 183 - 21929    Hällefors - 237 - 21929     Grycksbo - 350 - 21929 
                 0.9984071                  1.0000000                  1.0000000                  0.5863832                  0.9813024 
     Brättne - 100 - 31929 
                 0.9915349

在上面的 e$effLånged - 125 - 11929中被删除了！

EDIT：

如果我把它放在as.character(x)下面而不是简单地x

hhvrsu=lapply(unique(data$year),function(x){
    library(Benchmarking)
    datat=data[data$year==x,]
    datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[**as.character(x)**]])),]  

    #
    y <- datat2[,"Ouput_ton",drop=FALSE]
    rownames(y)=paste(datat2[,5],"-",datat2[,4])

    #inputs
      x=with(datat2,
        cbind(Labour_input_1000_hour,
              Capital_input_1000_sek,
              Electric_input_Mwh,
              Rawmaterial_input_M3))

    rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
    e <- dea(x,y,RTS="vrs")
    return(e$eff) }            
    )    
    names(hhvrsu)=unique(data$year)

有什么建议么？

输出：

dput(hhvrs)
structure(list(`1920` = structure(c(1.23562876282578, 1.01319073788091, 
1.55783496400001, 1.06191988898698, 1.12744927131341, 1.08504615635299, 
1.25725741409574, 2.03370195312046, 1.00667697472372, 1.00260726981462, 
1.3050604346423, 1.3594555255334, 1.55671945006842, 1.0072581093466, 
1.65164991096899, 2.47385616808447, 1.18471196771314, 1.24186522915967, 
1.65133103063843, Inf, 1.16498198151401, 1.07017484481922), .Names = c("Nykvarn - 147 - 211920", 
"Nykvarn - 262 - 211920", "Tumba - 68 - 381920", "Byske - 294 - 451920", 
"Långed - 127 - 571920", "Väja - 270 - 691920", "Ljusfors - 141 - 731920", 
"Skärblacka - 370 - 731920", "Sätra - 152 - 781920", "Krokfors - 129 - 871920", 
"Åsen - 207 - 1011920", "Åsen - 208 - 1011920", "Lagerfors - 225 - 10121920", 
"Lindefors - 243 - 10281920", "Munksjö - 253 - 10281920", "Qvill - 211 - 10431920", 
"Esseltewell - 375 - 10521920", "Esseltewell - 376 - 10521920", 
"Ulriksfors - 205 - 10541920", "Sellnäs - 352 - 10541920", "Vivstavarv - 314 - 10751920", 
"Älvsborg - 369 - 10791920")), `1929` = structure(c(1.31649939189229, 
1.02678542256861, 1.50667886828221, 1.06101596031178, 1.00477142430659, 
Inf, 1.00038550231904, 1.10347307305662, 1.53782048667181, 1.80890790261425, 
1.06103833744605, 1.00036736526695, 1.01053736983199, 1.01119078294682, 
1.00295000872313, 1.01778128036389, 1.22049428994262, 1.15078822074877, 
1.00346763843347, 1.2192497185324, 1.03195112444193, 1.71491513543284, 
1.00168840525869, 1.00575972592046, 1.105483053952, 1.00427057272637, 
1.94482017228275, 1.00388363163126), .Names = c("Långed - 125 - 11929", 
"Långed - 126 - 11929", "Långed - 127 - 11929", "Hällefors - 234 - 21929", 
"Göteborg-Dals - 156 - 91929", "Papyrus - 280 - 231929", "Sofiehem - 330 - 271929", 
"Tollare - 66 - 361929", "Tumba - 68 - 381929", "Alstermo - 4 - 491929", 
"Billerud - 106 - 571929", "Fengersfors - 135 - 711929", "Gamlestaden - 153 - 821929", 
"Gransholm - 228 - 851929", "Åsen - 207 - 1011929", "Nykvarn - 262 - 1101929", 
"Haga - 24 - 10041929", "Ljusne - 218 - 10181929", "Husum - 232 - 10251929", 
"Munksjö - 253 - 10281929", "Pauliström - 239 - 10311929", "Qvill - 211 - 10431929", 
"Esseltewell - 375 - 10521929", "Ställdalen - 356 - 10531929", 
"Kvarnsveden - 343 - 10541929", "Skutskär - 345 - 10541929", 
"Sellnäs - 352 - 10541929", "Vivstavarv - 314 - 10751929"))), .Names = c("1920", 
"1929"))

Dput data.frame

dput( data[data$year==1929,][1:5,])

structure(list(company_code = c(1L, 1L, 1L, 1L, 1L), company_name = c("AB Billingsfors-Långed", 
"AB Billingsfors-Långed", "AB Billingsfors-Långed", "AB Billingsfors-Långed", 
"AB Billingsfors-Långed"), year_cg_code = c(11929L, 11929L, 11929L, 
11929L, 11929L), plant_code = 123:127, plant_name = c("Billingsfors", 
"Billingsfors", "Långed", "Långed", "Långed"), plant_location = c("Billingsfors", 
"Billingsfors", "Dals Långed", "Dals Långed", "Dals Långed"), 
    plant_location_by_municipal = c("Bengtsfors", "Bengtsfors", 
    "Bengtsfors", "Bengtsfors", "Bengtsfors"), year = c(1929L, 
    1929L, 1929L, 1929L, 1929L), Output_value_1000_sek = c(720L, 
    2304L, 531L, 3040L, 2079L), Labour_cost_1000_sek = c(102L, 
    348L, 93L, 199L, 225L), Capital_cost_1000_sek = c(108L, 468L, 
    126L, 304L, 180L), Electricity_cost_1000_sek = c(130L, 90L, 
    10L, 120L, 40L), Raw_material_cost_1000_sek = c(174L, 744L, 
    177L, 1824L, 1080L), Output_price_1_sek.ton = c(220L, 220L, 
    220L, 220L, 220L), Output_price__sek.ton = c(196L, 196L, 
    196L, 196L, 196L), Labour_price_sek.hour = c(1, 1.208333333, 
    2.657142857, 1.093406593, 2.083333333), Capital_price_interest.rate = c(4.556666667, 
    4.556666667, 4.556666667, 4.556666667, 4.556666667), Motive_Power_pricekr.MwH = c(43.10344828, 
    67.61833208, 31.54574132, 93.45794393, 45.14672686), Electricity_price_kr.MwH = c(24.34456929, 
    24.19354839, 13.88888889, 25.26315789, 22.22222222), Raw_Material_price_kr.m3 = c(14.5, 
    15.5, 11.8, 19, 12), Mean_raw.material_price = c(14.3, 14.3, 
    14.3, 14.3, 14.3), Output_capacity_ton = c(6000L, 12000L, 
    3000L, 9500L, 9000L), Ouput_ton = c(3272L, 10472L, 2413L, 
    13818L, 9450L), Labour_input_1000_hour = c(102L, 288L, 35L, 
    182L, 108L), Capital_input_1000_sek = c(2853L, 1975L, 219L, 
    2634L, 878L), Motive_Power_Mwh = c(3016L, 1331L, 317L, 1284L, 
    886L), Electric_input_Mwh = c(5340, 3720, 720, 4750, 1800
    ), Rawmaterial_input_M3 = c(12000, 48000, 15000, 96000, 90000
    ), Capacity_Utilization = c(54.53333333, 87.26666667, 80.43333333, 
    145.4526316, 105), Labour_cost_share = c(14.16666667, 15.10416667, 
    17.51412429, 6.546052632, 10.82251082), Capital_cost_share = c(15, 
    20.3125, 23.72881356, 10, 8.658008658), Electricity_cost_share = c(18.05555556, 
    3.90625, 1.883239171, 3.947368421, 1.924001924), Raw_Material_cost_share = c(24.16666667, 
    32.29166667, 33.33333333, 60, 51.94805195), Labour_productivity = c(1.433165382, 
    1.624502304, 3.080154233, 3.392008925, 3.909230144), Capital_productivity = c(4.8, 
    22.1, 45.8, 21.9, 44.8), Power_productivity = c(0.24, 1.73, 
    1.68, 2.37, 2.35), Electricity_productivity = c(0.303469526, 
    1.39421497, 1.659846295, 1.440769899, 2.60017364), Raw.material.productivity = c(1.439189112, 
    1.151527229, 0.849086388, 0.759730866, 0.554210966), uniquezCorrect = c("Billingsfors - 123", 
    "Billingsfors - 124", "Långed - 125", "Långed - 126", "Långed - 127"
    ), uniquezCorrectCG = c("Billingsfors - 123 - 11929", "Billingsfors - 124 - 11929", 
    "Långed - 125 - 11929", "Långed - 126 - 11929", "Långed - 127 - 11929"
    )), .Names = c("company_code", "company_name", "year_cg_code", 
"plant_code", "plant_name", "plant_location", "plant_location_by_municipal", 
"year", "Output_value_1000_sek", "Labour_cost_1000_sek", "Capital_cost_1000_sek", 
"Electricity_cost_1000_sek", "Raw_material_cost_1000_sek", "Output_price_1_sek.ton", 
"Output_price__sek.ton", "Labour_price_sek.hour", "Capital_price_interest.rate", 
"Motive_Power_pricekr.MwH", "Electricity_price_kr.MwH", "Raw_Material_price_kr.m3", 
"Mean_raw.material_price", "Output_capacity_ton", "Ouput_ton", 
"Labour_input_1000_hour", "Capital_input_1000_sek", "Motive_Power_Mwh", 
"Electric_input_Mwh", "Rawmaterial_input_M3", "Capacity_Utilization", 
"Labour_cost_share", "Capital_cost_share", "Electricity_cost_share", 
"Raw_Material_cost_share", "Labour_productivity", "Capital_productivity", 
"Power_productivity", "Electricity_productivity", "Raw.material.productivity", 
"uniquezCorrect", "uniquezCorrectCG"), row.names = 6:10, class = "data.frame")

score 1 · Accepted Answer

我会做的有点不同（根本不使用lapply）。我会先用如下方式stack构造一个：data.framehhvrs

my.df <- stack(hhvrs)[, c("ind"), drop = FALSE]
names(my.df) <- c("year")
my.df <- transform(my.df,  uniquezCorrectCG = rownames(my.df))
rownames(my.df) <- NULL

现在检查那些在year和uniquezCorrectCG中存在data但在中不 my.df存在的条目。

data[!duplicated(rbind(my.df, data[, c("year", 
            "uniquezCorrectCG")]))[-seq_len(nrow(my.df))], ]

r - 奇怪的 lapply 有问题我无法理解

1 回答 1

Related

Reference