这是由于字节顺序标记问题。这是一个演示问题和修复的 JUnit 测试用例:
package rss;
import org.xml.sax.InputSource;
import java.io.*;
import java.net.*;
import com.sun.syndication.io.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.junit.Test;
public class RssEncodingTest {
String url = "http://www.moneydj.com/KMDJ/RssCenter.aspx?svc=NH&fno=1&arg=X0000000";
// This works because we use InputSource direct from the UrlConnection's InputStream
@Test
public void test01() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
try (InputStream is = new URL(url).openConnection().getInputStream()) {
InputSource source = new InputSource(is);
System.out.println("description: "
+ new SyndFeedInput().build(source).getDescription());
}
}
// But a String input fails because the byte order mark problem
@Test
public void test02() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
String html = IOUtils.toString(new URL(url).openConnection()
.getInputStream());
Reader reader = new StringReader(html);
System.out.println("description: "
+ new SyndFeedInput().build(reader).getDescription());
}
// We can use Apache Commons IO to fix the byte order mark
@Test
public void test03() throws MalformedURLException, IOException,
IllegalArgumentException, FeedException {
String html = IOUtils.toString(new URL(url).openConnection()
.getInputStream());
try (BOMInputStream bomIn = new BOMInputStream(
IOUtils.toInputStream(html))) {
String f = IOUtils.toString(bomIn);
Reader reader = new StringReader(f);
System.out.println("description: "
+ new SyndFeedInput().build(reader).getDescription());
}
}
}