我有一个 CSV 文件,它的格式有点像这样:
name subname value1 value2
a a 1 21
a a 2 22
a a 3 23
a a 4 24
b a 5 25
b a 6 26
b a 7 27
b a 8 28
c c 9 29
c c 10 30
c c 11 31
c c 12 32
....
etc
使用简单的 CSV 到 json 脚本,我设法将每一行输出为有效的 json 条目,但是这是非常多余的,因为有很多重复的值。
我正在尝试读取此文件并将其输出到如下所示的表单:
[
{
"name":"a",
"subname":"a",
"data": {
"attr1":{"name":"value1", "values":[1,2,3,4]},
"attr2":{"name":"value2", "values":[21,22,23,24]}
}
},
{
"name":"b",
"subname":"a",
"data": {
"attr1":{"name":"value1", "values":[5,6,7,8]},
"attr2":{"name":"value2", "values":[25,26,27,28]}
}
},
{
"name":"c",
"subname":"c",
"data": {
"attr1":{"name":"value1", "values":[9,10,11,12]},
"attr2":{"name":"value2", "values":[29,30,31,32]}
}
},
....
etc
]
我知道脚本应该像这样工作:
loop until no more rows:
skip row 1
for the next 4 rows
{
"name":row 1, column 1 ,
"subname":row 1, column 2 ,
"data": {
"attr1":{"name":"value1", "values":[row 1 to 4, column 3]}
"attr2":{"name":"value2", "values":[row 1 to 4, column 4]}
}
}
对于这个特定的数据集,总会有这种模式(但是,实际数据有更多的条目和列)。我知道我想要输出什么,但我不确定如何实现它。
我将如何用 python 做到这一点?非常感谢任何建议和解决方案。
编辑:这是使用 underscore.js 直接使用 javascript 的解决方案
var headers = this.get('headers')
var grid = this.get('grid')
var transposed = grid.transpose()
var tables = [];
var grid =
var rows = []
keys = ["name", "subname"]
var numberOfEntries = grid.length - 2;
_(numberOfEntries).times(function(n) {keys.push("attr" + (n+1) ) } )
_.each(transposed, function(row) {
rows.push(_.object(keys, row))
})
var names = _.uniq(grid[0])
_.each(names, function(name) {
var entries = _.where(rows, {name: name})
_.each(entries, function(entry) {
var exists = _.where(tables, {name: entry.name, subname: entry.subname})
var obj = {};
if(exists.length > 0) {
obj = exists[0]
}
else {
obj = {name: entry.name, subname: entry.subname, data: {}}
tables.push(obj)
}
_(numberOfEntries).times(function(n) {
var i = n + 1;
if( !obj.data["attr" + i] ) {
obj.data["attr" + i ] = {"name":headers[n+2], "values": []};
} else {
obj.data["attr" + i].values.push(entry["attr" + i])
}
})
})
})