2

这是上一个问题的延续: 我需要在 Parse.com 的 Cloud Code 上将 XML 转换为 JavaScript 中的 Json

请不要对此投反对票,因为您不相信 RegEx 是正确的选择。这是我必须处理的。 如果您对此有其他想法,请告诉我。但它必须在 Parse.com 的 Cloud Code 上运行。

原始 XML:

<?xml version="1.0" encoding="UTF-8" ?><api><products total-matched="1618" records-returned="1" page-number="1"><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product><product><ad-id>1234</ad-id><supplier-name>Window World</supplier-name><supplier-category>3703703</supplier-category><buy-url>http://website.com</buy-url><currency>USD</currency><description>Window</description><image-url>http://website.com/windowa/80x80.jpg</image-url><in-stock>yes</in-stock><manufacturer-name>Window World</manufacturer-name><name>Half Pain Glass</name><price>31.95</price><retail-price>87.60</retail-price><sale-price>29.95</sale-price><sku>5938</sku><upc></upc></product></products></api>

正则表达式代码:

var regex = /(<\w+[^<]*?)\s+([\w-]+)="([^"]+)">/;
            while(xml.match(regex)) xml = xml.replace(regex, '<$2>$3</$2>$1>'); // For attributes

            xml = xml.replace(/\s/g, ' ').  // Finds all the white space converts to single space
                    replace(/< *\?[^>]*?\? *>/g, ''). //Finds the XML header and removes it
                    replace(/< *!--[^>]*?-- *>/g, ''). //Finds and removes all comments
                    replace(/< *(\/?) *(\w[\w-]+\b):(\w[\w-]+\b)/g, '<$1$2_$3').
                    replace(/< *(\w[\w-]+\b)([^>]*?)\/ *>/g, '< $1$2>').
                    replace(/(\w[\w-]+\b):(\w[\w-]+\b) *= *"([^>]*?)"/g, '$1_$2="$3"').
                    replace(/< *(\w[\w-]+\b)((?: *\w[\w-]+ *= *" *[^"]*?")+ *)>( *[^< ]*?\b.*?)< *\/ *\1 *>/g, '< $1$2 value="$3">').
                    //replace(/ *(\w[\w-]+\b) *= *"([^>]*?)" */g, '< $1>$2').
                    replace(/< *(\w[\w-]+\b) *</g, '<$1>< ').
                    replace(/> *>/g, '>').
                    //replace(/< *\/ *(\w[\w-]+\b) *> *< *\1 *>/g, '').  // breaks the output?
                    replace(/"/g, '\\"').
                    replace(/< *(\w[\w-]+\b) *>([^<>]*?)< *\/ *\1 *>/g, '"$1":"$2",').
                    replace(/< *(\w[\w-]+\b) *>([^<>]*?)< *\/ *\1 *>/g, '"$1":{$2},').
                    replace(/< *(\w[\w-]+\b) *>(?=.*?< \/\1\},\{)/g, '"$1":[{').
                    split(/\},\{/).
                    reverse().
                    join('},{').
                    replace(/< *\/ *(\w[\w-]+\b) *>(?=.*?"\1":\[\{)/g, '}],').
                    split(/\},\{/).
                    reverse().
                    join('},{').
                    replace(/< \/(\w[\w-]+\b)\},\{\1>/g, '},{').
                    replace(/< *(\w[\w-]+\b)[^>]*?>/g, '"$1":{').
                    replace(/< *\/ *\w[\w-]+ *>/g,'},').
                    replace(/\} *,(?= *(\}|\]))/g, '}').
                    replace(/] *,(?= *(\}|\]))/g, ']').
                    replace(/" *,(?= *(\}|\]))/g, '"').
                    replace(/ *, *$/g, '');

输出:

"api": {
    "page-number": "1",
    "records-returned": "1",
    "total-matched": "1618",
    "products": {
        "product": {
            "ad-id": "1234",
            "supplier-name": "Window World",
            "supplier-category": "3703703",
            "buy-url": "http://website.com",
            "currency": "USD",
            "description": "Window",
            "image-url": "http://website.com/windowa/80x80.jpg",
            "in-stock": "yes",
            "manufacturer-name": "Window World",
            "name": "Half Pain Glass",
            "price": "31.95",
            "retail-price": "87.60",
            "sale-price": "29.95",
            "sku": "5938",
            "upc": ""
        },
        "product": {
            "ad-id": "1234",
            "supplier-name": "Window World",
            "supplier-category": "3703703",
            "buy-url": "http://website.com",
            "currency": "USD",
            "description": "Window",
            "image-url": "http://website.com/windowa/80x80.jpg",
            "in-stock": "yes",
            "manufacturer-name": "Window World",
            "name": "Half Pain Glass",
            "price": "31.95",
            "retail-price": "87.60",
            "sale-price": "29.95",
            "sku": "5938",
            "upc": ""
        },
        "product": {
            "ad-id": "1234",
            "supplier-name": "Window World",
            "supplier-category": "3703703",
            "buy-url": "http://website.com",
            "currency": "USD",
            "description": "Window",
            "image-url": "http://website.com/windowa/80x80.jpg",
            "in-stock": "yes",
            "manufacturer-name": "Window World",
            "name": "Half Pain Glass",
            "price": "31.95",
            "retail-price": "87.60",
            "sale-price": "29.95",
            "sku": "5938",
            "upc": ""
        },
        "product": {
            "ad-id": "1234",
            "supplier-name": "Window World",
            "supplier-category": "3703703",
            "buy-url": "http://website.com",
            "currency": "USD",
            "description": "Window",
            "image-url": "http://website.com/windowa/80x80.jpg",
            "in-stock": "yes",
            "manufacturer-name": "Window World",
            "name": "Half Pain Glass",
            "price": "31.95",
            "retail-price": "87.60",
            "sale-price": "29.95",
            "sku": "5938",
            "upc": ""
        }
    }
}

我遇到的最后一个问题(我知道)是这不会使重复项成为 JSON 数组。关于如何解决这个问题的任何想法?

4

2 回答 2

2

使用正则表达式是一种有趣的方法,它似乎比使用节点列表要快一些。但是,当速度不是决定因素时(如在 OP 的应用程序中),这不是将 xml 转换为 js 的最佳方法。正则表达式代码压缩了大约 1kb。对于相同数量的字节,您可以构建一个相当健壮且可重用的转换器......甚至可以处理不同浏览器中的 xml 名称空间。

我编写了以下代码(压缩),它很好地处理了 OP 的 XML 数据。

    var xml2js=function(m,p){var f=1,o=2,d=3,n=4,j=7,c=8,h=9,l,b,a,k={},g=[];if(!p){p={}}if(typeof p=="string"){p={find:p}}p.xmlns=p.xmlns||"*";if(p.parse!="function"){p.parse=e}function e(i){return i.split(":").pop().replace(/^ows_/,"").replace(/[^a-z,A-Z,0-9]/g,"")}switch(m.nodeType){case h:a=(!p.find)?m.childNodes:(m.getElementsByTagNameNS)?m.getElementsByTagNameNS(p.xmlns,p.find.split(":").pop()):m.getElementsByTagName(p.find);for(l=0;l<a.length;l++){k=xml2js(a[l]);if(k){g.push(k)}}k=(g.length&&g.length==1)?g[0]:g;break;case f:if(m.attributes.length==0&&m.childNodes.length==1&&m.childNodes.item(0).nodeValue){k=m.childNodes.item(0).nodeValue}for(l=0;l<m.attributes.length;l++){b=p.parse(m.attributes.item(l).nodeName);k[b]=m.attributes.item(l).nodeValue}for(l=0;l<m.childNodes.length;l++){if(m.childNodes.item(l).nodeType!=d){b=p.parse(m.childNodes.item(l).nodeName);if(typeof k[b]=="undefined"){k[b]=xml2js(m.childNodes.item(l))}else{if(typeof k[b].push=="undefined"){k[b]=[k[b]]}k[b].push(xml2js(m.childNodes.item(l)))}}}break;case n:k="<![CDATA["+m.nodeValue+"]]>";break;case d:k=m.nodeValue;break;case c:k="";break;default:k=null}return k};

然后加载并转换xml:

    function test( ) {
        var nodeName = 'products'; // optional - any node name
        var xhr = new XMLHttpRequest();
        xhr.open('GET', 'CloudCode.xml', false);
        xhr.send();
        var js = xml2js( xhr.responseXML, nodeName );
        console.log(JSON.stringify( js, null, '\t'));
    }

输出:

    {
    "pagenumber": "1",
    "recordsreturned": "1",
    "totalmatched": "1618",
     "product": [
      {
        "adid": "1234",
        "suppliername": "Window World",
        "suppliercategory": "3703703",
         "buyurl": "http://website.com",


         etc...
于 2014-03-30T02:23:39.887 回答
1

好的,请注意,这是一个快速修复,但它似乎工作。这只会添加一个数组结构,因此您不会有多次相同的键(但不会破坏该键)。
改变:

replace(/< *(\w[\w-]+\b) *>(?=.*?< \/\1\},\{)/g, '"$1":[{').
split(/\},\{/).
reverse().
join('},{').
replace(/< *\/ *(\w[\w-]+\b) *>(?=.*?"\1":\[\{)/g, '}],').
split(/\},\{/).
reverse().
join('},{').

这是一种实现数组的尝试。
并放:

replace(/< *(\w[\w-]+\b) *>(?=("\w[\w-]+\b)":\{.*?\},\2)(.*?)< *\/ *\1 *>/, '"$1":[$3],')

请注意,我几乎使用了他的匹配方式。这似乎至少对你有用。

于 2013-03-28T10:57:16.933 回答