我有以下哈希:
h = Hash["a","foo", "b","bar", "c","foo"]
我想返回最常见的值,在这种情况下foo
。最有效的方法是什么?
与此问题类似,但适用于哈希。
您可以将值作为数组获取,然后插入您链接的解决方案。
h.values.group_by { |e| e }.values.max_by(&:size).first
#=> foo
我们做得到:
h = Hash["a","foo", "b","bar", "c","foo", "d", "bar", 'e', 'foobar']
p h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
# >> "foo"
更新(比我的第一个解决方案慢)
h = Hash["a","foo", "b","bar", "c","foo"]
h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
# >> "foo"
基准
require 'benchmark'
def seanny123(h)
h.values.group_by { |e| e }.values.max_by(&:size).first
end
def stefan(h)
frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
value, count = frequencies.max_by { |k, v| v }
value
end
def yevgeniy_anfilofyev(h)
h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end
def acts_as_geek(h)
v = h.values
max = v.map {|i| v.count(i)}.max
v.select {|i| v.count(i) == max}.uniq
end
def squiguy(h)
v = h.values
v.reduce do |memo, val|
v.count(memo) > v.count(val) ? memo : val
end
end
def babai1(h)
h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end
def babai2(h)
h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
end
def benchmark(h,n)
Benchmark.bm(20) do |x|
x.report("Seanny123") { n.times { seanny123(h) } }
x.report("Stefan") { n.times { stefan(h) } }
x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
x.report("acts_as_geek") { n.times { acts_as_geek(h) } }
x.report("squiguy") { n.times { squiguy(h) } }
x.report("Babai1") { n.times { babai1(h) } }
x.report("Babai2") { n.times { babai2(h) } }
end
end
n = 10
h = {}
1000.times do |i|
h["a#{i}"] = "foo"
h["b#{i}"] = "bar"
h["c#{i}"] = "foo"
end
benchmark(h, n)
结果:-
user system total real
Seanny123 0.020000 0.000000 0.020000 ( 0.015550)
Stefan 0.040000 0.000000 0.040000 ( 0.044666)
Yevgeniy Anfilofyev 0.020000 0.000000 0.020000 ( 0.023162)
acts_as_geek 16.160000 0.000000 16.160000 ( 16.223582)
squiguy 15.740000 0.000000 15.740000 ( 15.768917)
Babai1 0.020000 0.000000 0.020000 ( 0.015430)
Babai2 0.020000 0.000000 0.020000 ( 0.025711)
用一个小哈希:
n = 100000
h = Hash["a","foo", "b","bar", "c","foo"]
benchmark(h, n)
结果:
user system total real
Seanny123 0.220000 0.000000 0.220000 ( 0.222342)
Stefan 0.260000 0.000000 0.260000 ( 0.263583)
Yevgeniy Anfilofyev 0.350000 0.000000 0.350000 ( 0.341685)
acts_as_geek 0.300000 0.000000 0.300000 ( 0.306601)
squiguy 0.140000 0.000000 0.140000 ( 0.139141)
Babai 0.220000 0.000000 0.220000 ( 0.218616)
使用大哈希:
n = 10
h = {}
1000.times do |i|
h["a#{i}"] = "foo"
h["b#{i}"] = "bar"
h["c#{i}"] = "foo"
end
benchmark(h, n)
结果:
user system total real
Seanny123 0.060000 0.000000 0.060000 ( 0.059068)
Stefan 0.100000 0.000000 0.100000 ( 0.100760)
Yevgeniy Anfilofyev 0.080000 0.000000 0.080000 ( 0.080988)
acts_as_geek 97.020000 0.020000 97.040000 ( 97.072220)
squiguy 97.480000 0.020000 97.500000 ( 97.535130)
Babai 0.050000 0.000000 0.050000 ( 0.058653)
基准代码:
require 'benchmark'
def seanny123(h)
h.values.group_by { |e| e }.values.max_by(&:size).first
end
def stefan(h)
frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
value, count = frequencies.max_by { |k, v| v }
value
end
def yevgeniy_anfilofyev(h)
h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end
def acts_as_geek(h)
v = h.values
max = v.map {|i| v.count(i)}.max
v.select {|i| v.count(i) == max}.uniq
end
def squiguy(h)
v = h.values
v.reduce do |memo, val|
v.count(memo) > v.count(val) ? memo : val
end
end
def babai(h)
h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end
def benchmark(h,n)
Benchmark.bm(20) do |x|
x.report("Seanny123") { n.times { seanny123(h) } }
x.report("Stefan") { n.times { stefan(h) } }
x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
x.report("acts_as_geek") { n.times { acts_as_geek(h) } }
x.report("squiguy") { n.times { squiguy(h) } }
x.report("Babai") { n.times { babai(h) } }
end
end
您可以计算频率Enumerable#inject
:
frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
#=> {"foo"=>2, "bar"=>1}
或者Enumerable#each_with_object
:
frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
#=> {"foo"=>2, "bar"=>1}
和最大值Enumerable#max_by
:
value, count = frequencies.max_by { |k, v| v }
#=> ["foo", 2]
value
#=> "foo"
有group_by
但没有values
和有sort_by
:
h.group_by{|(_,v)| v }
.sort_by{|(_,v)| v.size }[-1][0]
更新:不要使用我的解决方案。我的计算机必须使用大量时钟周期来计算它。不知道功能方法在这里会这么慢。
h = Hash["a","foo", "b","bar", "c","foo"]
v = h.values
most = v.reduce do |memo, val|
v.count(memo) > v.count(val) ? memo : val
end
p most
需要注意的是,如果两个或更多在哈希中共享相同的“最大”计数值,这只会返回一个值。如果您关心所有“最大值”值,那么这不是一个可以使用的解决方案。
这是一个基准。
#!/usr/bin/env ruby
require 'benchmark'
MULT = 10
arr = []
letters = ("a".."z").to_a
10000.times do
arr << letters.sample
end
10000.times do |i|
h[i] = arr[i]
end
Benchmark.bm do |rep|
rep.report("Seanny123") {
MULT.times do
h.values.group_by { |e| e }.values.max_by(&:size).first
end
}
rep.report("squiguy") {
MULT.times do
v = h.values
most = v.reduce do |memo, val|
v.count(memo) > v.count(val) ? memo : val
end
end
}
rep.report("acts_as_geek") {
MULT.times do
v = h.values
max = v.map {|i| v.count(i)}.max
v.select {|i| v.count(i) == max}.uniq
end
}
rep.report("Yevgeniy Anfilofyev") {
MULT.times do
h.group_by{|(_,v)| v }
.sort_by{|(_,v)| v.size }[-1][0]
end
}
rep.report("Stefan") {
MULT.times do
frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
value, count = frequencies.max_by { |k, v| v }
end
}
rep.report("Babai") {
MULT.times do
h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end
}
end