ruby - 查找最常见的哈希值

Question

我有以下哈希： h = Hash["a","foo", "b","bar", "c","foo"] 我想返回最常见的值，在这种情况下foo。最有效的方法是什么？

与此问题类似，但适用于哈希。

score 4 · Accepted Answer

您可以将值作为数组获取，然后插入您链接的解决方案。

h.values.group_by { |e| e }.values.max_by(&:size).first
#=> foo

score 3 · Accepted Answer

我们做得到：

h = Hash["a","foo", "b","bar", "c","foo", "d", "bar", 'e', 'foobar']
p h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
# >> "foo"

更新（比我的第一个解决方案慢）

h = Hash["a","foo", "b","bar", "c","foo"]
h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
# >> "foo"

基准

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai1(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end

def babai2(h)
  h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai1")               { n.times { babai1(h) } }
    x.report("Babai2")               { n.times { babai2(h) } }
  end
end

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

结果：-

                           user     system      total        real
Seanny123              0.020000   0.000000   0.020000 (  0.015550)
Stefan                 0.040000   0.000000   0.040000 (  0.044666)
Yevgeniy Anfilofyev    0.020000   0.000000   0.020000 (  0.023162)
acts_as_geek          16.160000   0.000000  16.160000 ( 16.223582)
squiguy               15.740000   0.000000  15.740000 ( 15.768917)
Babai1                 0.020000   0.000000   0.020000 (  0.015430)
Babai2                 0.020000   0.000000   0.020000 (  0.025711)

score 1 · Accepted Answer

基准

用一个小哈希：

n = 100000
h = Hash["a","foo", "b","bar", "c","foo"]
benchmark(h, n)

结果：

                           user     system      total        real
Seanny123              0.220000   0.000000   0.220000 (  0.222342)
Stefan                 0.260000   0.000000   0.260000 (  0.263583)
Yevgeniy Anfilofyev    0.350000   0.000000   0.350000 (  0.341685)
acts_as_geek           0.300000   0.000000   0.300000 (  0.306601)
squiguy                0.140000   0.000000   0.140000 (  0.139141)
Babai                  0.220000   0.000000   0.220000 (  0.218616)

使用大哈希：

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

结果：

                           user     system      total        real
Seanny123              0.060000   0.000000   0.060000 (  0.059068)
Stefan                 0.100000   0.000000   0.100000 (  0.100760)
Yevgeniy Anfilofyev    0.080000   0.000000   0.080000 (  0.080988)
acts_as_geek          97.020000   0.020000  97.040000 ( 97.072220)
squiguy               97.480000   0.020000  97.500000 ( 97.535130)
Babai                  0.050000   0.000000   0.050000 (  0.058653)

基准代码：

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai")               { n.times { babai(h) } }
  end
end

score 1 · Accepted Answer

您可以计算频率Enumerable#inject：

frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
#=> {"foo"=>2, "bar"=>1}

或者Enumerable#each_with_object：

frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
#=> {"foo"=>2, "bar"=>1}

和最大值Enumerable#max_by：

value, count = frequencies.max_by { |k, v| v }
#=> ["foo", 2]

value
#=> "foo"

score 0 · Accepted Answer

有group_by但没有values和有sort_by：

h.group_by{|(_,v)| v }
 .sort_by{|(_,v)| v.size }[-1][0]

score 0 · Accepted Answer

更新：不要使用我的解决方案。我的计算机必须使用大量时钟周期来计算它。不知道功能方法在这里会这么慢。

怎么用Enumerable#reduce？

h = Hash["a","foo", "b","bar", "c","foo"]
v = h.values
most = v.reduce do |memo, val|
         v.count(memo) > v.count(val) ? memo : val
       end
p most

需要注意的是，如果两个或更多在哈希中共享相同的“最大”计数值，这只会返回一个值。如果您关心所有“最大值”值，那么这不是一个可以使用的解决方案。

这是一个基准。

#!/usr/bin/env ruby

require 'benchmark'

MULT = 10
arr = []
letters = ("a".."z").to_a
10000.times do
  arr << letters.sample
end
10000.times do |i|
  h[i] = arr[i]
end

Benchmark.bm do |rep|
  rep.report("Seanny123") {
    MULT.times do
      h.values.group_by { |e| e }.values.max_by(&:size).first
    end
  }

  rep.report("squiguy") {
    MULT.times do
      v = h.values
      most = v.reduce do |memo, val|
        v.count(memo) > v.count(val) ? memo : val
      end
    end
  }

  rep.report("acts_as_geek") {
    MULT.times do
      v = h.values
      max = v.map {|i| v.count(i)}.max
      v.select {|i| v.count(i) == max}.uniq
    end
  }

  rep.report("Yevgeniy Anfilofyev") {
    MULT.times do
      h.group_by{|(_,v)| v }
       .sort_by{|(_,v)| v.size }[-1][0]
    end
  }

  rep.report("Stefan") {
    MULT.times do
      frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
      value, count = frequencies.max_by { |k, v| v }
    end
  }

  rep.report("Babai") {
    MULT.times do
      h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
    end
  }

end

ruby - 查找最常见的哈希值

6 回答 6

基准

Related

Reference