我只是将一个 json 到 xml 转换器一起破解,它或多或少地解析 json,并在正则表达式回调函数中将其直接操作为 xml。我已经完全发明了这种方法,所以我不确定是否有任何明显/可怕的缺点或限制 - 迄今为止,解析对我来说几乎是一个谜。
我在那里发表了很多评论,所以希望它很容易理解。
编码
var cb = function(m,oo,co,st,s,ignore,cl,cm,oa,ca){
// remove slashes from captured strings
s = stripslashes(s)
// this will be built, and returned at the end
var xml = ""
// function to add padding to the output, at the start of the lines
var pad = function(q){ q--; q *= 4
var out = ''
for(;q--;) out += ' '
return out
}
// track the arrat status, last item type, current indent level, and tags stack
this.array = this.array || null
this.last = this.last || null
this.level = this.level || 0
this.tags = this.tags || []
// Handle opening braces
if(oo){
if(this.array && this.last.match(/^(cm|oa)$/)){
xml += "\n"+pad(this.level)+"<item>"
this.tags.push("item")
}
// increase the indentation level
this.level++
// set the last item, which should be the last thing to do in the block
this.last = 'oo'
// Handle closing braces
} else if(co){
// if the last item was not a string, then add padding
if(this.last != 'st')
xml += pad(this.level)
// close a tag
xml += "</"+this.tags.pop()+">\n"
// decrease the indentation level
this.level--
// set the last item, which should be the last thing to do in the block
this.last = 'co'
// Handle Strings
} else if(st){
// Debug by showing the tags as they are pushed and popped from the stack
// console.log(this.tags)
// Handle Strings where last item was an Object, or a comma
if(this.last == "oo" || this.last == "cm"){
// add the tag to the stack, and xml output
this.tags.push(s)
xml += "\n"+pad(this.level)+"<"+s+">"
// Handle Strings where last item was an Colon
} else if(this.last == "cl"){
xml += s
}
// set the last item, which should be the last thing to do in the block
this.last = 'st'
// Handle the opan arrays
} else if(oa){
this.array = 1
this.level++
xml += "\n"+pad(this.level)+"<array>"
this.level++
this.last = 'oa'
// Handle the close arrays
} else if(ca){
this.array = 0
xml += pad(this.level)+"</"+this.tags.pop()+">\n"
this.level--
xml += pad(this.level)+"</array>\n"
this.level--
this.last = 'ca'
// Handle the colons
} else if(cl){
this.last = 'cl'
// Handle the commas
} else if(cm){
if(this.last == "co")
xml += pad(this.level)+"</"+this.tags.pop()+">"
else
xml += "</"+this.tags.pop()+">"
this.last = 'cm'
}
// return the built xml
return xml
}
// DEPENDENCY
function stripslashes (str) {
return (str + '').replace(/\\(.?)/g, function (s, n1) {
switch (n1) {
case '\\':
return '\\';
case '0':
return '\u0000';
case '':
return '';
default:
return n1;
}
});
}
// DATA FOR TESTING
var o = {
"hash":"b6f6991d03df0e2e04dafffcd6bc418aac66049e2cd74b80f14ac86db1e3f0da",
"ver":1,
"vin_sz":1,
"vout_sz":2,
"lock_time":"Unavailable",
"size":258,
"relayed_by":"64.179.201.80",
"block_height": 12200,
"tx_index":"12563028",
"inputs":[
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"va'l\"u'e":"1000\"00000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
}
],
"out":[
{ "value":"98000000",
"hash":"29d6a3540acfa0a950bef2bfdc75cd51c24390fd",
"scriptPubKey":"Unavailable"
},
{ "value":"2000000",
"hash":"17b5038a413f5c5ee288caa64cfab35a0c01914e",
"scriptPubKey":"Unavailable"
}
]
}
var s = JSON.stringify(o)
console.log("Original: "+s+"\n")
// var r = /({)|(})|([""''])(.+?)\3|(:)|(,)|(\[)|(\])/g
// THE REGEX
// capturing groups map to -> m,oo,co,st,s,ignore,cl,cm,oa,ca in the callback
// meaning match, open-object, close-object, string-type, string, ignore, colon, comma, open-array, close-array
// string escaping made possible by... `(['"])((\\\3|[^\3])+?)\3`
var r = /({)|(})|(['"])((\\\3|[^\3])+?)\3|(:)|(,)|(\[)|(\])/g
var xml = s.replace(r,cb)
console.log(xml)
问题
- 如何添加对嵌套数组的支持?
- 这种解析技术叫什么?
- 字符串转义方法是否可靠?
- 这是一种可行的解析方法吗?
- 使用这种方法有更通用的解决方案吗?