1

我有两个矩阵列表。两者都具有与对象相同的“类别”,并且顺序相同。这些对象始终是矩阵。

**mylist_1**            
$region_1           
users   50  20  30
revenue 10000   3500    4000

$region_2           
users   20  20  60
revenue 5000    4000    10000



**mylist_2**            
$region_1           
% female    0.1 0.3 0.8
income  10000   25000   30000

$region_2           
% female    0.5 0.4 0.3
income  50000   20000   23000

我想使用 mylist_1 中的用户来计算列表 2 中指标的加权平均值(即女性百分比和平均收入)。region_1 的列表 1 中的用户与列表 2 中 region_1 的指标相关,同样适用于区域 2 的关系。因此,我们希望每个指标的每列基于用户的区域 1 和 2 的加权平均值。即获得收入的第一个数字

(50*10000+20*50000)/(50+20)

即输出应如下所示:

输出:

% female    0.21    0.35    0.47
income  21429   22500   25333
4

1 回答 1

0

以下方法可能会有所帮助:

mylist = list(
  list1=list(
    region_1 = list(
        users = c(20,50,100),
        revenue = c(10000, 25000, 15000)
        ),
    region_2 = list(
        users = c(25,40,85),
        revenue = c(15000, 15000, 5000)
        )
  ),
  list2= list(
    region_1 = list(
        pct_females = c(.2,.50,.100),
        income = c(10000, 25000, 15000)
        ),
    region_2 = list(
        pct_females = c(.25,.40,.85),
        income = c(15000, 15000, 5000)
        )
    )
)


mylist
$list1
$list1$region_1
$list1$region_1$users
[1]  20  50 100

$list1$region_1$revenue
[1] 10000 25000 15000


$list1$region_2
$list1$region_2$users
[1] 25 40 85

$list1$region_2$revenue
[1] 15000 15000  5000



$list2
$list2$region_1
$list2$region_1$pct_females
[1] 0.2 0.5 0.1

$list2$region_1$income
[1] 10000 25000 15000


$list2$region_2
$list2$region_2$pct_females
[1] 0.25 0.40 0.85

$list2$region_2$income
[1] 15000 15000  5000



ddf = data.frame(mylist)
ddf
  list1.region_1.users list1.region_1.revenue list1.region_2.users list1.region_2.revenue list2.region_1.pct_females
1                   20                  10000                   25                  15000                        0.2
2                   50                  25000                   40                  15000                        0.5
3                  100                  15000                   85                   5000                        0.1
  list2.region_1.income list2.region_2.pct_females list2.region_2.income
1                 10000                       0.25                 15000
2                 25000                       0.40                 15000
3                 15000                       0.85                  5000
>

# for income: 
(ddf[,"list1.region_1.users"] * ddf[,"list2.region_1.income"] +
    ddf[,"list1.region_2.users"] * ddf[,"list2.region_2.income"]) /
(ddf[,"list1.region_1.users"]+ ddf[,"list1.region_2.users"])
[1] 12777.78 20555.56 10405.41

# for percent females: 
(ddf[,"list1.region_1.users"] * ddf[,"list2.region_1.pct_females"] +
    ddf[,"list1.region_2.users"] * ddf[,"list2.region_2.pct_females"]) /
(ddf[,"list1.region_1.users"]+ ddf[,"list1.region_2.users"])
[1] 0.2277778 0.4555556 0.4445946

对于具有相同信息的数据框:

ddf = structure(list(region = c("region_1", "region_1", "region_1", 
"region_2", "region_2", "region_2"), types = c("users", "percent_females", 
"income", "users", "percent_females", "income"), val1 = c(50, 
0.1, 10000, 20, 0.5, 50000), val2 = c(20, 0.3, 25000, 20, 0.4, 
20000), val3 = c(30, 0.8, 30000, 60, 0.3, 23000)), .Names = c("region", 
"types", "val1", "val2", "val3"), class = "data.frame", row.names = c(NA, 
-6L))

ddf
    region           types  val1    val2    val3
1 region_1           users 5e+01    20.0    30.0
2 region_1 percent_females 1e-01     0.3     0.8
3 region_1          income 1e+04 25000.0 30000.0
4 region_2           users 2e+01    20.0    60.0
5 region_2 percent_females 5e-01     0.4     0.3
6 region_2          income 5e+04 20000.0 23000

 ddf$newcol = paste(ddf$region, ddf$types, sep="_")
> 
> ddf
    region           types  val1    val2    val3                   newcol
1 region_1           users 5e+01    20.0    30.0           region_1_users
2 region_1 percent_females 1e-01     0.3     0.8 region_1_percent_females
3 region_1          income 1e+04 25000.0 30000.0          region_1_income
4 region_2           users 2e+01    20.0    60.0           region_2_users
5 region_2 percent_females 5e-01     0.4     0.3 region_2_percent_females
6 region_2          income 5e+04 20000.0 23000.0          region_2_income
> 
# for income: 
col=3:5
> (ddf[ddf$newcol=='region_1_users',col]* ddf[ddf$newcol=='region_1_income',col]+
+         ddf[ddf$newcol=='region_2_users',col]* ddf[ddf$newcol=='region_2_income',col]) /
+         (ddf[ddf$newcol=='region_1_users',col]+ ddf[ddf$newcol=='region_2_users',col])
      val1  val2     val3
1 21428.57 22500 25333.33


# for percent females:
(ddf[ddf$newcol=='region_1_users',col]* ddf[ddf$newcol=='region_1_percent_females',col]+
 ddf[ddf$newcol=='region_2_users',col]* ddf[ddf$newcol=='region_2_percent_females',col]) /
 (ddf[ddf$newcol=='region_1_users',col]+ ddf[ddf$newcol=='region_2_users',col])
       val1 val2      val3
1 0.2142857 0.35 0.4666667
于 2014-08-06T17:27:58.320 回答