1

i use URLConnection to read a weburl, however sometimes it can not read full response (and sometimes it worked fine for the same url),

one url is:http://messages.yahoo.com/yahoo/Business_%26_Finance/Investments/Stocks_%28A_to_Z%29/Stocks_Y/index.html

below is one sample cut-off response

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
    <head>
        <title>Yahoo! Message Boards - Stocks Y</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">


    <link rel="stylesheet" type="text/css" href="http://l.yimg.com/d/lib/mb/default_yui_ycs_20080219.css"/>


    <!--CSS source files for the Calendar Control -->
<style>
#cal1 { width: 100%;}
#cal2 { width: 100%;}
</style>

    </head>
    <body>
    <center>
        <div id="doc-layout">
        <link type="text/css" rel="stylesheet" href="http://l.yimg.com/a/lib/uh/15/css/uh_rsa-1.0.5.css" /><style type="text/css">#ygma div{clear:none;}#ygma #yahoo{padding-top:0;*padding-top:5px;}</style><script type="text/javascript">(function(){var h={};var setUp=function(){h = YAHOO.one.uh.hotlistInfo = {rd:"http://global.ard.yahoo.com",space:"/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=0",adid:"5857129",prop:"ymb",protocol:"http",host:"messages.yahoo.com",url:"http%3a%2f%2fmessages.yahoo.com%2fyahoo%2fBusiness_%2526_Finance%2fInvestments%2fStocks_%2528A_to_Z%2529%2fStocks_Y%2findex.html",spaceid:"389132707"};YAHOO.one.uh.translate=function(str){var set={yahoo_homepage:"http://www.yahoo.com/", hp_detect_script:"http://www.yahoo.com/includes/hdhpdetect.php", set_hp_script:"http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=1/SIG=10uacnjgh/*http://www.yahoo.com/bin/set", set_hp_firefox_instructions:["Drag the \"Y!\" and drop it onto the \"Home\" icon.", "Select \"Yes\" from the pop up window.", "Nothing, you're done."], close_this_window:"Close this window", set_hp_alternative_instructions1:"If this didn't work for you see ", detailed_set_hp_instructions:"detailed instructions", set_hp_alternative_instructions2:""};return set[str];};YAHOO.one.uh.Search=['ygmasearchInput', 'sat'];};if("undefined" !==typeof YAHOO && "undefined" !==typeof YAHOO.one && "undefined" !==typeof YAHOO.one.uh){setUp();}else{setTimeout(arguments.callee, 500);}})();</script><div id="ygma"><div id="ygmaheader"><div class="bd sp"><div id="ymenu" class="ygmaclr"><div id="mepanel"><ul id="mepanel-nav"><li class="me1"><em>New User? <a class="ygmasignup" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=2/SIG=1395kefdv/*https://edit.yahoo.com/config/eval_register?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us">Register</a></em></li><li class="me2"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=3/SIG=132l1q4g4/*https://login.yahoo.com/config/login?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us"><em>Sign In</em></a></li><li class="me3"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=4/SIG=11bv9h578/*http://help.yahoo.com/l/us/yahoo/finance/" target="_top">Help</a></li></ul></div><div id="ygmapromo"><div id="ygmapromo-i"></div>
<style> 
#ygma-s{display:none;}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {display:inline;}
</style> 
<script> 
(function(){
window.YAHOO = window.YAHOO || {};
YAHOO.one = window.YAHOO.one || {};
YAHOO.one.uh = window.YAHOO.one.uh || {};
YAHOO.one.uh.popularSearches = function(data) {
    var chop = function(s, n) {
        if (s.length > n) {
        return s.substring(0,n)+"...";
        }
        else return s;
    }
    var e = document.getElementById("ygmapromo") || document.getElementById("ygmabtpromo");
    var e2 = document.getElementById("ygmapromo-i");
if (!data.query.results || data.query.results == "0" || !data.query.results.links) {
        return;
    }
    var i = data.query.results.links; 
    var hd = i.text;
    var fr = "&fr=ush-tts&fr2=ps";
    e2.innerHTML = '<a href="http://global.ard.yahoo.com/SIG=15p310019/M=650008.13959238.14033549.12832737/D=ymb/S=389132707:HPRM2/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=OFPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=6076890/R=0/*'+i.href+fr+'" id="ygma-s-h">Trending: '+chop(hd, 20)+'</a>';
};
var ie;
if (navigator.userAgent.match(/MSIE\s6/)) {
    var D = new Date();
    var yr = D.getFullYear();
    var mt = D.getMonth()+1;
    var dy = D.getDate();
    var hr = D.getHours();
    ie = '&cache=' + yr + mt + dy + hr;
}
else {
    ie = "";
}
var y = document.getElementById("ygma") || document.getElementById("ygmabt");
var feed;
if (y.offsetWidth < 850){
    feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending3?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
else {
    feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending2?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
var h = document.getElementsByTagName("head").item(0);
var s = document.createElement("script");
s.setAttribute("type", "text/javascript");
s.setAttribute("charset", "utf-8");
s.setAttribute("src", feed + ie);
h.appendChild(s);
})();
</script>     
<style> 
#ygma ol.searches h4, #ygmabt ol.searches h4 {
    font-size:100%;
    font-weight:bold;
    color:#333333;
    text-align:left;
    padding-bottom:3px;
    margin:0;
}
#ygma ol.searches, #ygmabt ol.searches {
    position:absolute;
    z-index:10002;
    width:155px;
    _width:165px;
    padding:7px 7px 3px 7px;
    background-color:#ffffff;
    border:1px solid #cacaca;
    margin:0;
}
#ygma ol.searches li, #ygmabt ol.searches li {
    text-align:left;
    list-style-type:none;
    color:#163780;
    margin:0;
    padding: 0 0 2px 0;
}
#ygma #ygmapromo ol.searches a, #ygmabt #ygmabtpromo ol.searches a {
    font-weight:normal;
    color:#163780;
}
#ygma #ygmapromo ol.searches a:hover, #ygmabt #ygmabtpromo ol.searches a:hover {
    width:100%;
}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {
    position:relative;
    z-index:10002;
    zoom:1;
}
</style><script language=javascript>
if(window.yzq_d==null)window.yzq_d=new Object();
window.yzq_d['OFPLD0oGYzg-']='&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1';
</script><noscript><img width=1 height=1 alt="" src="http://us.bc.yahoo.com/b?P=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc&T=17t8jqecg%2fX%3d1342682750%2fE%3d389132707%2fR%3dymb%2fK%3d5%2fV%3d2.1%2fW%3dH%2fY%3dYAHOO%2fF%3d2676527448%2fH%3dc2VydmVJZD0iNHkwbC5XS0pMR0dYWWFLSTZnOXpXUTVod1BveEFWQUh0bjRBQnRYYyIgc2l0ZUlkPSI0NDY1NTUxIiB0U3RtcD0iMTM0MjY4Mjc1MDQ2NTkyNSIg%2fQ%3d-1%2fS%3d1%2fJ%3dFA208962&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1"></noscript></div><div id="pa"><div id="pa-wrapper"><ul id="pa2-nav" class="sp"><li class="pa1 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=5/SIG=10np9vmbm/*http://www.yahoo.com/" target="_top">Yahoo!</a></li><li class="pa2 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=6/SIG=1107gluf6/*http://mail.yahoo.com?.intl=us" target="_top">Mail</a></li></ul><div id="pa-left" class="sp"></div><ul id="pa-nav" class="sp"><li class="pa3 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=7/SIG=10l2nj3k8/*http://my.yahoo.com" title="My Yahoo!" target="_top">My Yahoo!</a></li><li class="pa4 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=8/SIG=10niob72s/*http://news.yahoo.com" title="Yahoo! News" target="_top">News</a></li><li class="pa5 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=9/SIG=10q40gpus/*http://finance.yahoo.com" title="Yahoo! Finance" target="_top">Finance</a></li><li class="pa6 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=10/SIG=10pcalhda/*http://sports.yahoo.com" title="Yahoo! Sports" target="_top">Sports</a></li></ul><div id="pa-right" class="sp"></div></div></div></div><div id="yahoo" class="ygmaclr"><div id="ygmabot"> <a id="ygmalogo" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=11/SIG=10q40gpus/*http://finance.yahoo.com" target="_top"><img id="ygmalogoimg" width="246" h...

below is my code to read a web url,

public String getHtml(String urlstr)
{
    try {
        URL url = new URL(urlstr);
        URLConnection conn = url.openConnection();

        conn.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)");


        System.out.println("now to set cookie: " + cookie);

        conn.addRequestProperty("Cookie", cookie);






        BufferedReader in = new BufferedReader(new InputStreamReader(
                                    conn.getInputStream()));
        String inputLine;

        extractCookie(conn);



        //PrintWriter file = new PrintWriter(new BufferedWriter(new FileWriter("out.txt")));


        StringBuilder sb = new StringBuilder();

        while ((inputLine = in.readLine()) != null)
        {
            //file.println(inputLine);
            sb.append(inputLine).append("\n");
        }

        in.close();


        //file.close();



        return sb.toString();

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return "";
}

private void extractCookie(URLConnection conn) {
    System.out.println("now try to get cookie:");
    Map<String, List<String>> fields = conn.getHeaderFields();
    for(String key: fields.keySet())
    {
        List<String> values = fields.get(key);


        if("Set-Cookie".equalsIgnoreCase(key))
        {
            System.out.println(values.size());

            for(String v: values)
            {
                String c = Util.getCookie(v);

                this.cookieSet.add(c);  

                System.out.println(c);
            }

            //this.cookieMap.p

            //cookieSet.add(values);
        }

    }
}
4

1 回答 1

1

好吧,我认为这段代码工作正常,在调试模式下,eclipse 变量没有显示长字符串的完整值,这让我感到困惑......

(我从调试模式的变量值复制了输出......)

后来我打印到控制台/文件,这对于我迄今为止测试过的案例来说似乎很好......

于 2012-07-19T08:37:22.490 回答