这是一段代码,可以为您做到这一点。除了您尝试执行的操作之外,它还能够处理 GZip 压缩(如果您在标头中使用 设置它Accept-Encoding: gzip, deflate
)并自动为您检测编码(处理字符串所必需的)。
private InputStream prepareInputStream(String urlToRetrieve) throws IOException
{
URL url = new URL(urlToRetrieve);
URLConnection uc = url.openConnection();
if (timeOut > 0)
{
uc.setConnectTimeout(timeOut);
uc.setReadTimeout(timeOut);
}
InputStream is = uc.getInputStream();
// deflate, if necesarily
if ("gzip".equals(uc.getContentEncoding()))
is = new GZIPInputStream(is);
this.lastURLConnection = uc;
return is;
}
// detects encoding associated to the current URL connection, taking into account the default encoding
public String detectEncoding()
{
if (forceDefaultEncoding)
return defaultEncoding;
String detectedEncoding = detectEncodingFromContentTypeHTTPHeader(lastURLConnection.getContentType());
if (detectedEncoding == null)
return defaultEncoding;
return detectedEncoding;
}
public static String detectEncodingFromContentTypeHTTPHeader(String contentType)
{
if (contentType != null)
{
int chsIndex = contentType.indexOf("charset=");
if (chsIndex != -1)
{
String enc = StringTools.substringAfter(contentType , "charset=");
if(enc.indexOf(';') != -1)
enc = StringTools.substringBefore(enc , ";");
return enc.trim();
}
}
return null;
}
// retrieves into an String object
public String retrieve(String urlToRetrieve)
throws MalformedURLException , IOException
{
InputStream is = prepareInputStream(urlToRetrieve);
String encoding = detectEncoding();
BufferedReader in = new BufferedReader(new InputStreamReader(is , encoding));
StringBuilder output = new StringBuilder(BUFFER_LEN_STRING);
String str;
boolean first = true;
while ((str = in.readLine()) != null)
{
if (!first)
output.append("\n");
first = false;
output.append(str);
}
in.close();
return output.toString();
}
代码来自info.olteanu.utils.retrieve.RetrievePage
Phramer项目。