1

我只是将一个 json 到 xml 转换器一起破解,它或多或少地解析 json,并在正则表达式回调函数中将其直接操作为 xml。我已经完全发明了这种方法,所以我不确定是否有任何明显/可怕的缺点或限制 - 迄今为止,解析对我来说几乎是一个谜。

我在那里发表了很多评论,所以希望它很容易理解。

编码

var cb = function(m,oo,co,st,s,ignore,cl,cm,oa,ca){

  // remove slashes from captured strings
  s = stripslashes(s)

  // this will be built, and returned at the end
  var xml = ""

  // function to add padding to the output, at the start of the lines
  var pad = function(q){ q--; q *= 4
    var out = ''
    for(;q--;) out += ' '
    return out
  }

  // track the arrat status, last item type, current indent level, and tags stack
  this.array = this.array || null
  this.last = this.last || null
  this.level = this.level || 0
  this.tags = this.tags || []

  // Handle opening braces
  if(oo){

    if(this.array && this.last.match(/^(cm|oa)$/)){
      xml += "\n"+pad(this.level)+"<item>"
      this.tags.push("item")
    }

    // increase the indentation level
    this.level++

    // set the last item, which should be the last thing to do in the block
    this.last = 'oo'

  // Handle closing braces
  } else if(co){

    // if the last item was not a string, then add padding
    if(this.last != 'st')
      xml += pad(this.level)

    // close a tag
    xml += "</"+this.tags.pop()+">\n"

    // decrease the indentation level
    this.level--

    // set the last item, which should be the last thing to do in the block
    this.last = 'co'

  // Handle Strings
  } else if(st){

    // Debug by showing the tags as they are pushed and popped from the stack
    // console.log(this.tags)

    // Handle Strings where last item was an Object, or a comma
    if(this.last == "oo" || this.last == "cm"){

      // add the tag to the stack, and xml output
      this.tags.push(s)
      xml += "\n"+pad(this.level)+"<"+s+">"

    // Handle Strings where last item was an Colon
    } else if(this.last == "cl"){
      xml += s
    }

    // set the last item, which should be the last thing to do in the block
    this.last = 'st'

  // Handle the opan arrays
  } else if(oa){
    this.array = 1
    this.level++
    xml += "\n"+pad(this.level)+"<array>"
    this.level++
    this.last = 'oa'

  // Handle the close arrays
  } else if(ca){
    this.array = 0
    xml += pad(this.level)+"</"+this.tags.pop()+">\n"
    this.level--
    xml += pad(this.level)+"</array>\n"
    this.level--
    this.last = 'ca'

  // Handle the colons
  } else if(cl){
    this.last = 'cl'

  // Handle the commas
  } else if(cm){
    if(this.last == "co")
      xml += pad(this.level)+"</"+this.tags.pop()+">"
    else
      xml += "</"+this.tags.pop()+">"
    this.last = 'cm'
  }

  // return the built xml
  return xml

}


// DEPENDENCY
function stripslashes (str) {

  return (str + '').replace(/\\(.?)/g, function (s, n1) {
    switch (n1) {
    case '\\':
      return '\\';
    case '0':
      return '\u0000';
    case '':
      return '';
    default:
      return n1;
    }
  });
}

// DATA FOR TESTING
var o = {
  "hash":"b6f6991d03df0e2e04dafffcd6bc418aac66049e2cd74b80f14ac86db1e3f0da",
  "ver":1,
  "vin_sz":1,
  "vout_sz":2,
  "lock_time":"Unavailable",
  "size":258,
  "relayed_by":"64.179.201.80",
  "block_height": 12200,
  "tx_index":"12563028",
  "inputs":[
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "va'l\"u'e":"1000\"00000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    }
  ],
  "out":[
    { "value":"98000000",
      "hash":"29d6a3540acfa0a950bef2bfdc75cd51c24390fd",
      "scriptPubKey":"Unavailable"
    },
    { "value":"2000000",
      "hash":"17b5038a413f5c5ee288caa64cfab35a0c01914e",
      "scriptPubKey":"Unavailable"
    }
  ]
}

var s = JSON.stringify(o)

console.log("Original: "+s+"\n")

// var r = /({)|(})|([""''])(.+?)\3|(:)|(,)|(\[)|(\])/g

// THE REGEX
// capturing groups map to -> m,oo,co,st,s,ignore,cl,cm,oa,ca in the callback
// meaning match, open-object, close-object, string-type, string, ignore, colon, comma, open-array, close-array
// string escaping made possible by... `(['"])((\\\3|[^\3])+?)\3`
var r = /({)|(})|(['"])((\\\3|[^\3])+?)\3|(:)|(,)|(\[)|(\])/g

var xml = s.replace(r,cb)

console.log(xml)

问题

  1. 如何添加对嵌套数组的支持?
  2. 这种解析技术叫什么?
  3. 字符串转义方法是否可靠?
  4. 这是一种可行的解析方法吗?
  5. 使用这种方法有更通用的解决方案吗?
4

1 回答 1

0

当您正在寻找自己的解决方案时,我相信您的意思是了解解析器在这种情况下是如何工作的。

如何添加对嵌套数组的支持?

数组可以是值、对象和数组的集合。因此,编写一个分别处理值、数组和对象的方法。现在,当您遇到数组或数组内的对象时,调用相同的函数。

我不确定其他问题。但这是我在快速 xml 解析器中使用的解决方案

var j2x = function(jObj,level){
    var xmlStr = "", attrStr = "" , val = "";
    var keys = Object.keys(jObj);
    var len = keys.length;
    for(var i=0;i<len;i++){
        var key = keys[i];
        if(typeof jObj[key] !== "object"){//premitive type
            var attr = this.isAttribute(key);
            if(attr){
                attrStr += " " +attr+"=\""+ this.encodeHTMLchar(jObj[key], true) +"\"";
            }else if(this.isCDATA(key)){
                if(jObj[this.options.textNodeName]){
                    val += this.replaceCDATAstr(jObj[this.options.textNodeName], jObj[key]);
                }else{
                    val += this.replaceCDATAstr("", jObj[key]);
                }
            }else{//tag value
                if(key === this.options.textNodeName){
                    if(jObj[this.options.cdataTagName]){
                        //value will added while processing cdata
                    }else{
                        val += this.encodeHTMLchar(jObj[key]);    
                    }
                }else{
                    val += this.buildTextNode(jObj[key],key,"",level);
                }
            }
        }else if(Array.isArray(jObj[key])){//repeated nodes
            if(this.isCDATA(key)){
                if(jObj[this.options.textNodeName]){
                    val += this.replaceCDATAarr(jObj[this.options.textNodeName], jObj[key]);
                }else{
                    val += this.replaceCDATAarr("", jObj[key]);
                }
            }else{//nested nodes
                var arrLen = jObj[key].length;
                for(var j=0;j<arrLen;j++){
                    var item = jObj[key][j];
                    if(typeof item === "object"){
                        var result = this.j2x(item,level+1);
                        val  += this.buildObjNode(result.val,key,result.attrStr,level);
                    }else{
                        val += this.buildTextNode(item,key,"",level);
                    }
                }
            }
        }else{

            if(this.options.attrNodeName && key === this.options.attrNodeName){
                var Ks = Object.keys(jObj[key]);
                var L = Ks.length;
                for(var j=0;j<L;j++){
                    attrStr += " "+Ks[j]+"=\"" + this.encodeHTMLchar(jObj[key][Ks[j]]) + "\"";
                }
            }else{
                var result = this.j2x(jObj[key],level+1);
                val  += this.buildObjNode(result.val,key,result.attrStr,level);
            }
        }
    }
    return {attrStr : attrStr , val : val};
}
于 2018-02-17T03:46:09.357 回答