0

我有这个扩展 Ruby Array 类的代码:

# Extendes the Array class to have methods giving the same RSpec functionality of
# checking if Array elements equals to another ones'.
class Array
  def self.same_elements?(actual, expected)
    extra_items   = difference_between_arrays(actual, expected)
    missing_items = difference_between_arrays(expected, actual)
    extra_items.empty? & missing_items.empty?
  end

  def has_same_elements?(expected)
    extra_items   = self.class.difference_between_arrays(self, expected)
    missing_items = self.class.difference_between_arrays(expected, self)
    extra_items.empty? & missing_items.empty?
  end

  protected
  def self.difference_between_arrays(array_1, array_2)
    difference = array_1.dup
    array_2.each do |element|
      if (index = difference.index(element))
        difference.delete_at(index)
      end
    end
    difference
  end
end

这是规范:

describe Array do
  before(:each) do
    @a = [1,2,3]
    @b = [3,2,1]
    @c = [1,2,3,4]
    @d = [4,1,3]
    @e = [1,2,3]
  end
  it "should respond to .same_elements?" do
    Array.should respond_to(:same_elements?)
  end
  it "should respond to #has_same_elements?" do
    Array.new.should respond_to(:has_same_elements?)
  end

  describe ".same_elements?" do
    it "should return correct values" do
      Array.same_elements?(@a,@a).should eq(true)
      Array.same_elements?(@a,@b).should eq(true)
      Array.same_elements?(@a,@c).should eq(false)
      Array.same_elements?(@a,@d).should eq(false)
      Array.same_elements?(@a,@e).should eq(true)

      Array.same_elements?(@b,@a).should eq(true)
      Array.same_elements?(@b,@b).should eq(true)
      Array.same_elements?(@b,@c).should eq(false)
      Array.same_elements?(@b,@d).should eq(false)
      Array.same_elements?(@b,@e).should eq(true)

      Array.same_elements?(@c,@a).should eq(false)
      Array.same_elements?(@c,@b).should eq(false)
      Array.same_elements?(@c,@c).should eq(true)
      Array.same_elements?(@c,@d).should eq(false)
      Array.same_elements?(@c,@e).should eq(false)

      Array.same_elements?(@d,@a).should eq(false)
      Array.same_elements?(@d,@b).should eq(false)
      Array.same_elements?(@d,@c).should eq(false)
      Array.same_elements?(@d,@d).should eq(true)
      Array.same_elements?(@d,@e).should eq(false)

      Array.same_elements?(@e,@a).should eq(true)
      Array.same_elements?(@e,@b).should eq(true)
      Array.same_elements?(@e,@c).should eq(false)
      Array.same_elements?(@e,@d).should eq(false)
      Array.same_elements?(@e,@e).should eq(true)
    end
  end

  describe "#has_same_elements?" do
    it "should return correct values" do
      @a.has_same_elements?(@a).should eq(true)
      @a.has_same_elements?(@b).should eq(true)
      @a.has_same_elements?(@c).should eq(false)
      @a.has_same_elements?(@d).should eq(false)
      @a.has_same_elements?(@e).should eq(true)

      @b.has_same_elements?(@a).should eq(true)
      @b.has_same_elements?(@b).should eq(true)
      @b.has_same_elements?(@c).should eq(false)
      @b.has_same_elements?(@d).should eq(false)
      @b.has_same_elements?(@e).should eq(true)

      @c.has_same_elements?(@a).should eq(false)
      @c.has_same_elements?(@b).should eq(false)
      @c.has_same_elements?(@c).should eq(true)
      @c.has_same_elements?(@d).should eq(false)
      @c.has_same_elements?(@e).should eq(false)

      @d.has_same_elements?(@a).should eq(false)
      @d.has_same_elements?(@b).should eq(false)
      @d.has_same_elements?(@c).should eq(false)
      @d.has_same_elements?(@d).should eq(true)
      @d.has_same_elements?(@e).should eq(false)

      @e.has_same_elements?(@a).should eq(true)
      @e.has_same_elements?(@b).should eq(true)
      @e.has_same_elements?(@c).should eq(false)
      @e.has_same_elements?(@d).should eq(false)
      @e.has_same_elements?(@e).should eq(true)
    end
  end
end

然而,这段代码显然对于大型数组来说变得非常慢。

出于性能原因,是否值得将这种方法移植到 C 中?你会如何解决这个问题?鉴于我正在使用 Ruby 2.0.0-p0,您能否推荐任何(最近的)文章来构建此类功能?

更新

两个提议的解决方案之间的基准:

                                           user     system      total        real
(sort) instance method arr_1 vs arr_2      1.910000   0.030000   1.940000 (  1.935651)
(set) instance method arr_1 vs arr_2       7.010000   0.360000   7.370000 (  7.377319)
(sort) class method arr_1 vs arr_2         1.920000   0.030000   1.950000 (  1.952080)
(set) class method arr_1 vs arr_2          6.610000   0.320000   6.930000 (  6.919273)
(sort) instance method arr_1 vs arr_3      2.520000   0.090000   2.610000 (  2.620047)
(set) instance method arr_1 vs arr_3       7.620000   0.330000   7.950000 (  7.951402)
(sort) class method arr_1 vs arr_3         1.920000   0.030000   1.950000 (  1.943820)
(set) class method arr_1 vs arr_3          8.130000   0.390000   8.520000 (  8.523959)

快速bm代码:

require 'benchmark'
require 'set'

class Array
  def self.same_elements_with_sort?(actual, expected)
    actual.has_same_elements_with_sort?(expected)
  end

  def has_same_elements_with_sort?(expected)
    self.sort == expected.sort
  end

  # -----

  def self.same_elements?(actual, expected)
    actual.to_set == expected.to_set
  end

  def has_same_elements?(expected)
    Array.same_elements?(self, expected)
  end
end

Benchmark.bm(40) do|b|
  arr_1 = (1..8000000).to_a.sample(40000)
  arr_2 = (1..8000000).to_a.sample(40000)
  arr_3 = arr_1

  b.report('(sort) instance method arr_1 vs arr_2') do
    150.times { arr_1.has_same_elements_with_sort?(arr_2) }
  end

  b.report('(set) instance method arr_1 vs arr_2') do
    150.times { arr_1.has_same_elements?(arr_2) }
  end

  b.report('(sort) class method arr_1 vs arr_2') do
    150.times { Array.same_elements_with_sort?(arr_1, arr_2) }
  end

  b.report('(set) class method arr_1 vs arr_2') do
    150.times { Array.same_elements?(arr_1, arr_2) }
  end

  b.report('(sort) instance method arr_1 vs arr_3') do
    150.times { arr_1.has_same_elements_with_sort?(arr_3) }
  end

  b.report('(set) instance method arr_1 vs arr_3') do
    150.times { arr_1.has_same_elements?(arr_3) }
  end

  b.report('(sort) class method arr_1 vs arr_3') do
    150.times { Array.same_elements_with_sort?(arr_1, arr_3) }
  end

  b.report('(set) class method arr_1 vs arr_3') do
    150.times { Array.same_elements?(arr_1, arr_3) }
  end
end
4

2 回答 2

1

这可能更快(并且更容易阅读):

require 'set'

class Array
  def self.same_elements?(actual, expected)
    actual.to_set == expected.to_set
  end

  def has_same_elements?(expected)
    Array.same_elements?(self, expected)
  end
end

在您的测试中,它们不包括这种情况:

 [1,2,3].has_same_elements?([1,1,2,3])

答案应该是true,而您的实现返回false

于 2013-05-08T15:09:33.227 回答
1

您的代码来自 RSpec 的MatchArray. RSpec 计算extra_itemsmissing_items提供有意义的错误消息。

如果您不需要该信息,则可以对数组进行排序:

class Array
  def self.same_elements?(actual, expected)
    actual.has_same_elements?(expected)
  end

  def has_same_elements?(expected)
    self.sort == expected.sort
  end
end
于 2013-05-08T15:17:39.747 回答