以下方法可能会有所帮助:
mylist = list(
list1=list(
region_1 = list(
users = c(20,50,100),
revenue = c(10000, 25000, 15000)
),
region_2 = list(
users = c(25,40,85),
revenue = c(15000, 15000, 5000)
)
),
list2= list(
region_1 = list(
pct_females = c(.2,.50,.100),
income = c(10000, 25000, 15000)
),
region_2 = list(
pct_females = c(.25,.40,.85),
income = c(15000, 15000, 5000)
)
)
)
mylist
$list1
$list1$region_1
$list1$region_1$users
[1] 20 50 100
$list1$region_1$revenue
[1] 10000 25000 15000
$list1$region_2
$list1$region_2$users
[1] 25 40 85
$list1$region_2$revenue
[1] 15000 15000 5000
$list2
$list2$region_1
$list2$region_1$pct_females
[1] 0.2 0.5 0.1
$list2$region_1$income
[1] 10000 25000 15000
$list2$region_2
$list2$region_2$pct_females
[1] 0.25 0.40 0.85
$list2$region_2$income
[1] 15000 15000 5000
ddf = data.frame(mylist)
ddf
list1.region_1.users list1.region_1.revenue list1.region_2.users list1.region_2.revenue list2.region_1.pct_females
1 20 10000 25 15000 0.2
2 50 25000 40 15000 0.5
3 100 15000 85 5000 0.1
list2.region_1.income list2.region_2.pct_females list2.region_2.income
1 10000 0.25 15000
2 25000 0.40 15000
3 15000 0.85 5000
>
# for income:
(ddf[,"list1.region_1.users"] * ddf[,"list2.region_1.income"] +
ddf[,"list1.region_2.users"] * ddf[,"list2.region_2.income"]) /
(ddf[,"list1.region_1.users"]+ ddf[,"list1.region_2.users"])
[1] 12777.78 20555.56 10405.41
# for percent females:
(ddf[,"list1.region_1.users"] * ddf[,"list2.region_1.pct_females"] +
ddf[,"list1.region_2.users"] * ddf[,"list2.region_2.pct_females"]) /
(ddf[,"list1.region_1.users"]+ ddf[,"list1.region_2.users"])
[1] 0.2277778 0.4555556 0.4445946
对于具有相同信息的数据框:
ddf = structure(list(region = c("region_1", "region_1", "region_1",
"region_2", "region_2", "region_2"), types = c("users", "percent_females",
"income", "users", "percent_females", "income"), val1 = c(50,
0.1, 10000, 20, 0.5, 50000), val2 = c(20, 0.3, 25000, 20, 0.4,
20000), val3 = c(30, 0.8, 30000, 60, 0.3, 23000)), .Names = c("region",
"types", "val1", "val2", "val3"), class = "data.frame", row.names = c(NA,
-6L))
ddf
region types val1 val2 val3
1 region_1 users 5e+01 20.0 30.0
2 region_1 percent_females 1e-01 0.3 0.8
3 region_1 income 1e+04 25000.0 30000.0
4 region_2 users 2e+01 20.0 60.0
5 region_2 percent_females 5e-01 0.4 0.3
6 region_2 income 5e+04 20000.0 23000
ddf$newcol = paste(ddf$region, ddf$types, sep="_")
>
> ddf
region types val1 val2 val3 newcol
1 region_1 users 5e+01 20.0 30.0 region_1_users
2 region_1 percent_females 1e-01 0.3 0.8 region_1_percent_females
3 region_1 income 1e+04 25000.0 30000.0 region_1_income
4 region_2 users 2e+01 20.0 60.0 region_2_users
5 region_2 percent_females 5e-01 0.4 0.3 region_2_percent_females
6 region_2 income 5e+04 20000.0 23000.0 region_2_income
>
# for income:
col=3:5
> (ddf[ddf$newcol=='region_1_users',col]* ddf[ddf$newcol=='region_1_income',col]+
+ ddf[ddf$newcol=='region_2_users',col]* ddf[ddf$newcol=='region_2_income',col]) /
+ (ddf[ddf$newcol=='region_1_users',col]+ ddf[ddf$newcol=='region_2_users',col])
val1 val2 val3
1 21428.57 22500 25333.33
# for percent females:
(ddf[ddf$newcol=='region_1_users',col]* ddf[ddf$newcol=='region_1_percent_females',col]+
ddf[ddf$newcol=='region_2_users',col]* ddf[ddf$newcol=='region_2_percent_females',col]) /
(ddf[ddf$newcol=='region_1_users',col]+ ddf[ddf$newcol=='region_2_users',col])
val1 val2 val3
1 0.2142857 0.35 0.4666667