我了解您希望读取 CSV 文件,对数据执行一些操作,然后将结果写入新的 CSV 文件。您可以按如下方式进行。
代码
require 'csv'
def convert(csv_file_in, csv_file_out, group_field, aggregate_field)
csv = CSV.read(FNameIn, headers: true)
headers = csv.headers
arr = csv.group_by { |row| row[group_field] }.
map do |_,a|
headers.map { |h| h==aggregate_field ?
(a.map { |row| row[aggregate_field] }.join('/')) : a.first[h] }
end
CSV.open(FNameOut, "wb") do |csv|
csv << headers
arr.each { |row| csv << row }
end
end
例子
让我们使用以下数据创建一个 CSV 文件:
s =<<_
SKU,Title,Category,Price
001,Soap,Bathroom,0.5
001,Soap,Kitchen,0.5
002,Water,Kitchen,0.4
002,Water,Garage,0.4
003,Juice,Kitchen,0.8
_
FNameIn = 'testin.csv'
FNameOut = 'testout.csv'
IO.write(FNameIn, s)
#=> 135
现在使用这些值执行该方法:
convert(FNameIn, FNameOut, "SKU", "Category")
并确认FNameOut
写入正确:
puts IO.read(FNameOut)
SKU,Title,Category,Price
001,Soap,Bathroom/Kitchen,0.5
002,Water,Kitchen/Garage,0.4
003,Juice,Kitchen,0.8
解释
步骤如下:
csv_file_in = FNameIn
csv_file_out = FNameOut
group_field = "SKU"
aggregate_field = "Category"
csv = CSV.read(FNameIn, headers: true)
请参阅CSV::read。
headers = csv.headers
#=> ["SKU", "Title", "Category", "Price"]
h = csv.group_by { |row| row[group_field] }
#=> {"001"=>[
#<CSV::Row "SKU":"001" "Title":"Soap" "Category":"Bathroom" "Price":"0.5">,
# #<CSV::Row "SKU":"001" "Title":"Soap" "Category":"Kitchen" "Price":"0.5">
# ],
# "002"=>[
# #<CSV::Row "SKU":"002" "Title":"Water" "Category":"Kitchen" "Price":"0.4">,
# #<CSV::Row "SKU":"002" "Title":"Water" "Category":"Garage" "Price":"0.4">
# ],
# "003"=>[
# #<CSV::Row "SKU":"003" "Title":"Juice" "Category":"Kitchen" "Price":"0.8">
# ]
# }
arr = h.map do |_,a|
headers.map { |h| h==aggregate_field ?
(a.map { |row| row[aggregate_field] }.join('/')) : a.first[h] }
end
#=> [["001", "Soap", "Bathroom/Kitchen", "0.5"],
# ["002", "Water", "Kitchen/Garage", "0.4"],
# ["003", "Juice", "Kitchen", "0.8"]]
请参阅CSV#headers和Enumerable#group_by,这是一种常用的方法。最后,编写输出文件:
CSV.open(FNameOut, "wb") do |csv|
csv << headers
arr.each { |row| csv << row }
end
请参阅CSV::open。现在让我们回到 的计算arr
。这很容易通过插入一些puts
语句并执行代码来解释。
arr = h.map do |_,a|
puts " _=#{_}"
puts " a=#{a}"
headers.map do |h|
puts " header=#{h}"
if h==aggregate_field
a.map { |row| row[aggregate_field] }.join('/')
else
a.first[h]
end.
tap { |s| puts " mapped to #{s}" }
end
end
请参阅Object#tap。显示以下内容。
_=001
a=[#<CSV::Row "SKU":"001" "Title":"Soap" "Category":"Bathroom" "Price":"0.5">,
#<CSV::Row "SKU":"001" "Title":"Soap" "Category":"Kitchen" "Price":"0.5">]
header=SKU
mapped to 001
header=Title
mapped to Soap
header=Category
mapped to Bathroom/Kitchen
header=Price
mapped to 0.5
_=002
a=[#<CSV::Row "SKU":"002" "Title":"Water" "Category":"Kitchen" "Price":"0.4">,
#<CSV::Row "SKU":"002" "Title":"Water" "Category":"Garage" "Price":"0.4">]
header=SKU
mapped to 002
header=Title
mapped to Water
header=Category
mapped to Kitchen/Garage
header=Price
mapped to 0.4
_=003
a=[#<CSV::Row "SKU":"003" "Title":"Juice" "Category":"Kitchen" "Price":"0.8">]
header=SKU
mapped to 003
header=Title
mapped to Juice
header=Category
mapped to Kitchen
header=Price
mapped to 0.8