当我运行我的代码时,我会收到非常奇怪的错误消息:
/Users/Pan/Data/external/filter_url_1008.rb:35:语法错误,意外的keyword_end /Users/Pan/Data/external/filter_url_1008.rb:45:语法错误,意外的输入结束,期待关键字结束 filter_file.close ^
我检查了我的 Ruby 代码几次,但无法找出问题所在。
#This script is for filterring any html files that doesn't abide the rule.
require "fileutils"
#path where html files will be read from
source_dir = "/20131008"
#path where flittered html files will be copy to
dest_dir ="/20131008_filtered"
#file index to be filtered
filter_file = File.open("filtered_index.txt","r")
if !File.exist?(dest_dir)
FileUtils.mkdir_p("/dest_dir")
print(dest_dir + " was created!\n")
end
#filter rule
blacklist = ["facebook.com", "youtube.com", "twitter.com",
"linkedin.com", "bebo.com", "twitlonger.com", "bing.com", "ebay.com",
"ebayrt.com", "maps.google", "waze.com", "foursquare.com", "adf.ly",
"twitpic.com","itunes.apple.com","craigslist.org","instagram.com",
"google.com", "google.co.uk", "google.ie","bullhornreach",
"pinterest.com", "feedsportal","tumblr.com"]
filter = filter_file.read
#Read from 20131008_filtered.txt and exclude urls that's in blacklist
filter.each_line do |line|
$match_count = 0
blacklist.each do |blacklist_atom|
if !(line.downcase.include? "blacklist_atom")
match_count += 1
end
end
if (blacklist.length == match_count)
filename_cp = line[line.index("20131008/") + 9..line.index(".html") - 1]
filename = filename_cp.to_s + ".html"
FileUtils.cp(source_dir + "/" + filename, dest_dir)
end
end
filter_file.close