1

我有一个程序来获取一个大的 xml 文件并对其进行验证,然后将其拆分为较小的文件。我遇到的问题是新文件的编码是 UTF-8 。我需要它们在 ISO-8859-1

这里是代码

public class SplitMain {

public static void main(String [] args) throws Exception {
    validateInputFile("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/");
    File input = new File("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/sample.xml");
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    Document doc = dbf.newDocumentBuilder().parse(input);
    XPath xpath = XPathFactory.newInstance().newXPath();

    NodeList nodes = (NodeList) xpath.evaluate("//DataFile/Contact", doc, XPathConstants.NODESET);

    int itemsPerFile = 5;
    int fileNumber = 0;
    Document currentDoc = dbf.newDocumentBuilder().newDocument();
    Node rootNode = currentDoc.createElement("DataFile");
    File currentFile = new File("nufile"+fileNumber+".xml");
    for (int i=1; i <= nodes.getLength(); i++) {
        Node imported = currentDoc.importNode(nodes.item(i-1), true);
        rootNode.appendChild(imported);

        if (i % itemsPerFile == 0) {
            writeToFile(rootNode, currentFile);

            rootNode = currentDoc.createElement("DataFile");
            currentFile = new File("nufile"+(++fileNumber)+".xml");
            System.out.println(currentFile);
        }
    }

    writeToFile(rootNode, currentFile);
}

private static void writeToFile(Node node, File file) throws Exception {
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
private static void validateInputFile(String WORK_DIRECTORY)
{
    //String workingDir=config.getProperty(WORK_DIRECTORY);//comment for automating the process

    String workingDir=WORK_DIRECTORY;//added for automating the process

    String finalString = null;
    File folder = new File(workingDir);

    if (folder.isFile())
    {
        System.out.println("watever");
        return ;
    }

    String[] fileNameArray = folder.list();
    String xmlExtension=".xml";
    for (String fileName : fileNameArray) {



            try{
                //XMLtoString
                BufferedReader br = new BufferedReader(new FileReader(new File(workingDir + "/" +fileName))); 
                String line;
                StringBuilder stringBuilder = new StringBuilder();
                while((line=br.readLine())!= null)
                {
                    stringBuilder.append(line.trim()); 
                } 
                finalString = stringBuilder.toString();
                StringBuilder sb = new StringBuilder();

                if(finalString == null)
                    return;
               System.out.println(finalString);
                for(int i=0;i<finalString.length();i++)
                {
                   if (finalString.charAt(i) == '&')
                    {
                       sb.append("&amp;");

                    }
                    else
                    {
                        sb.append(finalString.charAt(i));
                    }
                }
                finalString=sb.toString();

                //StringToXML

                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();   
                System.out.println(finalString);
                DocumentBuilder builder=factory.newDocumentBuilder();
                Document document = builder.parse( new InputSource(new StringReader( finalString ) ) ); 
                TransformerFactory tranFactory = TransformerFactory.newInstance();   
                Transformer aTransformer = tranFactory.newTransformer(); 
                Source src = new DOMSource(document); 
                Result dest = new StreamResult( new File( workingDir + "/" +fileName) );
                aTransformer.transform( src, dest ); 

            }
                catch (Exception e) {
                e.printStackTrace();
                }



    }
}

}

4

1 回答 1

3

您需要指定 Transformer 编码,例如:

transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");

然后使用流写出(如果您使用写入器,它将通过另一个不需要的编码层)

于 2012-11-22T14:43:03.103 回答