0

在调整应用程序时,我发现这个例程去除 CDATA 标记的 XML 字符串并用字符引用替换某些字符,以便可以在 HTML 页面中显示这些字符。

例行公事不够完美;如果 XML 有问题,它将留下尾随空格并会因 StringOutOfBounds 异常而中断。

当我开始处理路由时,我创建了一些单元测试,但目前的功能可以改进,所以这些可以提供更多参考。

出于理智的原因,例程需要重构。但是,我需要修复这个程序的真正原因是为了提高性能。它已成为应用程序中严重的性能瓶颈。

package engine;

import junit.framework.Assert;
import junit.framework.TestCase;

public class StringFunctionsTest extends TestCase {

    public void testEscapeXMLSimple(){
        final String simple = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]></SvcRsData></xml> ";      
        final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#38; Smith&#60;/sender&#62;</SvcRsData></xml> ";
        String result = StringFunctions.escapeXML(simple);
        Assert.assertTrue(result.equals(expected));
    }

    public void testEscapeXMLCDATAInsideCDATA(){
        final String stringWithCDATAInsideCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John <![CDATA[Inner & CD ]]>& Smith</sender>]]></SvcRsData></xml> ";       
        final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#60;![CDATA[Inner &#38; CD & Smith</sender>]]></SvcRsData></xml> ";
        String result = StringFunctions.escapeXML(stringWithCDATAInsideCDATA);      
        Assert.assertTrue(result.equals(expected));
    }

    public void testEscapeXMLCDATAWithoutClosingTag(){      
        final String stringWithCDATAWithoutClosingTag = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender></SvcRsData></xml> ";
        try{
            String result = StringFunctions.escapeXML(stringWithCDATAWithoutClosingTag);
        }catch(StringIndexOutOfBoundsException exception){
            Assert.assertNotNull(exception);
        }   
    }

    public void testEscapeXMLCDATAWithTwoCDATAClosingTags(){        
        final String stringWithCDATAWithTwoClosingTags = "<xml><SvcRsData>a<![CDATA[<sender>John Inner & CD ]]>& Smith</sender>]]>bcd & efg</SvcRsData></xml> ";        
        final String expectedAfterSecondClosingTagNotEscaped = "<xml><SvcRsData>a&#60;sender&#62;John Inner &#38; CD & Smith</sender>]]>bcd & efg</SvcRsData></xml> ";
        String result = StringFunctions.escapeXML(stringWithCDATAWithTwoClosingTags);
        Assert.assertTrue(result.equals(expectedAfterSecondClosingTagNotEscaped));
    }

    public void testEscapeXMLSimpleTwoCDATA(){
        final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><![CDATA[<recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";      
        final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#38; Smith&#60;/sender&#62;abc<sometag>xyz</sometag><sometag2>&#60;recipient&#62;Gorge &#38; Doe&#60;/recipient&#62;</sometag2></SvcRsData></xml> ";
        String result = StringFunctions.escapeXML(stringWithTwoCDATA);
        Assert.assertTrue(result.equals(expected));
    }

    public void testEscapeXMLOverlappingCDATA(){
        final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & <![CDATA[Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";      
        final String expectedMess = "<xml><SvcRsData>a&#60;sender&#62;John &#38; &#60;![CDATA[Smith&#60;/sender&#62;abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";
        String result = StringFunctions.escapeXML(stringWithTwoCDATA);
        Assert.assertTrue(result.equals(expectedMess));
    }

}

这是功能:

package engine;

public class StringFunctions {

    public static String escapeXML(String s) {
        StringBuffer result = new StringBuffer();
        int stringSize = 0;
        int posIniData = 0, posFinData = 0, posIniCData = 0, posFinCData = 0;
        String stringPreData = "", stringRsData = "", stringPosData = "", stringCData = "", stringPreCData = "", stringTempRsData = "";
        String stringNewRsData = "", stringPosCData = "", stringNewCData = "";
        short caracter;

        stringSize = s.length();
        posIniData = s.indexOf("<SvcRsData>");
        if (posIniData > 0) {
            posIniData = posIniData + 11;
            posFinData = s.indexOf("</SvcRsData>");
            stringPreData = s.substring(0, posIniData);
            stringRsData = s.substring(posIniData, posFinData);
            stringPosData = s.substring(posFinData, stringSize);
            stringTempRsData = stringRsData;
            posIniCData = stringRsData.indexOf("<![CDATA[");
            if (posIniCData > 0) {
                while (posIniCData > 0) {
                    posIniCData = posIniCData + 9;
                    posFinCData = stringTempRsData.indexOf("]]>");
                    stringPreCData = stringTempRsData.substring(0,
                            posIniCData - 9);
                    stringCData = stringTempRsData.substring(posIniCData,
                            posFinCData);
                    stringPosCData = stringTempRsData.substring(
                            posFinCData + 3, stringTempRsData.length());

                    stringNewCData = replaceCharacter(stringCData);
                    stringTempRsData = stringTempRsData.substring(
                            posFinCData + 3, stringTempRsData.length());
                    stringNewRsData = stringNewRsData + stringPreCData
                            + stringNewCData;
                    posIniCData = stringTempRsData.indexOf("<![CDATA[");
                }
            } else {
                stringNewRsData = stringRsData;
            }
            stringNewRsData = stringNewRsData + stringPosCData;
            s = stringPreData + stringNewRsData + stringPosData;
            stringSize = s.length();
        }

        for (int i = 0; i < stringSize; i++) {
            caracter = (short) s.charAt(i);
            if (caracter > 128) {
                result.append("&#");
                result.append(caracter);
                result.append(';');
            } else {
                result.append((char) caracter);
            }
        }
        return result.toString();

    }

    private static String replaceCharacter(String s) {
        StringBuffer result = new StringBuffer();
        int stringSize = s.length();
        short caracter;

        for (int i = 0; i < stringSize; i++) {

            caracter = (short) s.charAt(i);
            if (caracter > 128 || caracter == 34 || caracter == 38
                    || caracter == 60 || caracter == 62) {
                result.append("&#");
                result.append(caracter);
                result.append(';');
            } else {
                result.append((char) caracter);
            }
        }
        return result.toString();

    }

}
4

3 回答 3

2

在我看来,您正在做一些以前已经做过的事情,可能是在 apache commons 中。

你的函数太复杂了,我不确定你是否真的在“转义XML”或其他什么东西。如果您所做的只是转义 xml,那么您应该使用 google 以获得更好的实现。

于 2009-04-21T17:22:22.963 回答
2

看看来自Apache Commons的StringEscapeUtils 类。它包含一个方法。escapeXML

于 2009-04-21T17:25:47.777 回答
1

看看来自 Apache Commons的StringEscapeUtils 。这具有可靠地转义/取消转义 XML/HTML 的功能。

于 2009-04-21T17:25:34.453 回答