0

我有一些这样的代码:

require 'nokogiri'

class Parser
  def self.parse(html)
    @data = Nokogiri.HTML(open(html))
    merged_hashes = {}

    array_of_hashes = [
      parse_department,
      parse_super_saver,
      parse_new_arrivals,
      parse_out_of_stock,
      parse_categories,
      parse_results,
      parse_category
    ]
    array_of_hashes.inject(merged_hashes,:update)

    return merged_hashes
  end

  ## Categories

  (etc...)  

  def self.parse_results
    results = @data.css('#refinements ul').first
    unless results
      @results_hash = {}
      return @results_hash
    end

    if results.css('li:nth-child(1) a span').text == "Pet Supplies"
      @results_hash = {}
      @results_hash[:results] ||= {}
      @results_hash[:results] = @data.at_css('#resultCount span').text[/(\S+) Results$/i, 1].delete(",").to_i
    else
      @results_hash = {}
    end

    return @results_hash
  end

  ## Hot Lists

  def self.parse_category
    category = @data.at_css('#zg_listTitle span')

    unless category
      @category_hash = {}
      return @category_hash
    end

    @category_hash = {}
    @category_hash[:category] ||= {}
    @category_hash[:category] = @data.at_css('#zg_listTitle span').text

    return @category_hash
  end
end

这工作正常:

    results = @data.css('#refinements ul').first
    unless results
      @results_hash = {}
      return @results_hash
    end

当没有#refinements ul元素时,代码停止并返回一个空灰。

但在这种情况下

    category = @data.at_css('#zg_listTitle span')
    unless category
      @category_hash = {}
      return @category_hash
    end

即使没有任何#zg_listTitle span元素,代码似乎仍在继续。

可能是什么问题呢?

编辑:

规格:

require File.dirname(__FILE__) + '/parser.rb'

def html_pet_supplies
  File.open("parse_categories/amazon_pet_supplies.html")
end

def html_dog_supplies
  File.open("parse_categories/amazon_dog_supplies.html")
end

def html_bird_supplies
  File.open("parse_categories/amazon_bird_supplies.html")
end

def html_baby
  File.open("parse_hotlists/amazon_baby.html")
end

(etc.)

describe "Results (Dogs)" do
  let(:results_hash) { Parser.parse html_dog_supplies } 

  it "should return correct hash" do
    expect(results_hash[:results]).to eq(514265)
  end
end


## Hot Lists

describe "Category" do
  let(:category_hash) { Parser.parse html_baby } 

  it "should return correct hash" do
    expect(category_hash[:category]).to eq("Baby")
  end
end
4

1 回答 1

1

如果#zg_listTitle span未找到则@data.at_css('#zg_listTitle span')返回nil。所以我认为你需要使用if而不是unless.

category = @data.at_css('#zg_listTitle span')
if category
  @category_hash = {}
  return @category_hash
end
于 2013-08-22T10:21:20.320 回答