这是上一个问题的延续: 我需要在 Parse.com 的 Cloud Code 上将 XML 转换为 JavaScript 中的 Json
请不要对此投反对票,因为您不相信 RegEx 是正确的选择。这是我必须处理的。 如果您对此有其他想法,请告诉我。但它必须在 Parse.com 的 Cloud Code 上运行。
原始 XML:
<?xml version="1.0" encoding="UTF-8" ?><api><products total-matched="1618" records-returned="1" page-number="1"><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product></products></api>
正则表达式代码:
var regex = /(<\w+[^<]*?)\s+([\w-]+)="([^"]+)">/;
while(xml.match(regex)) xml = xml.replace(regex, '<$2>$3</$2>$1>'); // For attributes
xml = xml.replace(/\s/g, ' '). // Finds all the white space converts to single space
replace(/< *\?[^>]*?\? *>/g, ''). //Finds the XML header and removes it
replace(/< *!--[^>]*?-- *>/g, ''). //Finds and removes all comments
replace(/< *(\/?) *(\w[\w-]+\b):(\w[\w-]+\b)/g, '<$1$2_$3').
replace(/< *(\w[\w-]+\b)([^>]*?)\/ *>/g, '< $1$2>').
replace(/(\w[\w-]+\b):(\w[\w-]+\b) *= *"([^>]*?)"/g, '$1_$2="$3"').
replace(/< *(\w[\w-]+\b)((?: *\w[\w-]+ *= *" *[^"]*?")+ *)>( *[^< ]*?\b.*?)< *\/ *\1 *>/g, '< $1$2 value="$3">').
//replace(/ *(\w[\w-]+\b) *= *"([^>]*?)" */g, '< $1>$2').
replace(/< *(\w[\w-]+\b) *</g, '<$1>< ').
replace(/> *>/g, '>').
//replace(/< *\/ *(\w[\w-]+\b) *> *< *\1 *>/g, ''). // breaks the output?
replace(/"/g, '\\"').
replace(/< *(\w[\w-]+\b) *>([^<>]*?)< *\/ *\1 *>/g, '"$1":"$2",').
replace(/< *(\w[\w-]+\b) *>([^<>]*?)< *\/ *\1 *>/g, '"$1":{$2},').
replace(/< *(\w[\w-]+\b) *>(?=.*?< \/\1\},\{)/g, '"$1":[{').
split(/\},\{/).
reverse().
join('},{').
replace(/< *\/ *(\w[\w-]+\b) *>(?=.*?"\1":\[\{)/g, '}],').
split(/\},\{/).
reverse().
join('},{').
replace(/< \/(\w[\w-]+\b)\},\{\1>/g, '},{').
replace(/< *(\w[\w-]+\b)[^>]*?>/g, '"$1":{').
replace(/< *\/ *\w[\w-]+ *>/g,'},').
replace(/\} *,(?= *(\}|\]))/g, '}').
replace(/] *,(?= *(\}|\]))/g, ']').
replace(/" *,(?= *(\}|\]))/g, '"').
replace(/ *, *$/g, '');
输出:
"api": {
"page-number": "1",
"records-returned": "1",
"total-matched": "1618",
"products": {
"product": {
"ad-id": "1234",
"supplier-name": "Window World",
"supplier-category": "3703703",
"buy-url": "http://website.com",
"currency": "USD",
"description": "Window",
"image-url": "http://website.com/windowa/80x80.jpg",
"in-stock": "yes",
"manufacturer-name": "Window World",
"name": "Half Pain Glass",
"price": "31.95",
"retail-price": "87.60",
"sale-price": "29.95",
"sku": "5938",
"upc": ""
},
"product": {
"ad-id": "1234",
"supplier-name": "Window World",
"supplier-category": "3703703",
"buy-url": "http://website.com",
"currency": "USD",
"description": "Window",
"image-url": "http://website.com/windowa/80x80.jpg",
"in-stock": "yes",
"manufacturer-name": "Window World",
"name": "Half Pain Glass",
"price": "31.95",
"retail-price": "87.60",
"sale-price": "29.95",
"sku": "5938",
"upc": ""
},
"product": {
"ad-id": "1234",
"supplier-name": "Window World",
"supplier-category": "3703703",
"buy-url": "http://website.com",
"currency": "USD",
"description": "Window",
"image-url": "http://website.com/windowa/80x80.jpg",
"in-stock": "yes",
"manufacturer-name": "Window World",
"name": "Half Pain Glass",
"price": "31.95",
"retail-price": "87.60",
"sale-price": "29.95",
"sku": "5938",
"upc": ""
},
"product": {
"ad-id": "1234",
"supplier-name": "Window World",
"supplier-category": "3703703",
"buy-url": "http://website.com",
"currency": "USD",
"description": "Window",
"image-url": "http://website.com/windowa/80x80.jpg",
"in-stock": "yes",
"manufacturer-name": "Window World",
"name": "Half Pain Glass",
"price": "31.95",
"retail-price": "87.60",
"sale-price": "29.95",
"sku": "5938",
"upc": ""
}
}
}
我遇到的最后一个问题(我知道)是这不会使重复项成为 JSON 数组。关于如何解决这个问题的任何想法?