java - 穿越推特时间线

Question

使用 twitter 上的 rest api，服务器只提供有限数量的推文。我正在尝试通过循环使用 api 中建议的方法 max_id 来检索用户历史记录。但是，每次调用时，我都会得到一个新的 xml 标头，它会引发 SAX Parser 错误，因此我无法检索 max_id。我查看了 Transformer 的 java api 和 OMIT_XML_DECLARATION 但我不确定将它放在我的代码中的哪个位置，是否应该在连接、打印方法或处理文档时删除 xml 声明。我无法弄清楚 Transformer 的输出......虽然我已经深入研究过它。

public class DataGrabber {

File destFile;
int qcount = 0;

public void getRuserHx() throws ParserConfigurationException, IOException, InterruptedException, SAXException {
        int downNodes = 0;
        Integer statTot = 10;
        String maxId = null;

        File filename = new File(MyIds.hoopoeData + "/" + MyIds.rootUser + "Hx.xml");

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc;


            //loop and download based on the id of the last tweet
            while(downNodes < statTot){
             System.out.println("Getting user status history...");
             String filex = MyIds.rootUser + "Hx.xml";
             String https_url = "https://twitter.com/statuses/user_timeline.xml?screen_name=" + MyIds.rootUser + "&count=300";
             makeConnection(https_url, filex);

             //validate the xml before you parse
             doc = builder.parse(filename); 
             doc.getDocumentElement().normalize();

             //set up for the loop           
             downNodes = HooUtil.nodeCount(filename, "status");
             statTot = Integer.parseInt(HooUtil.nodeValue(filename, "user", "statuses_count", 0));

             Long loopMax = (Long.valueOf(HooUtil.nodeValue(filename, "status", "id", downNodes - 1)) - 1);
             maxId = loopMax.toString();
             https_url = "https://twitter.com/statuses/user_timeline.xml?screen_name=" + MyIds.rootUser + "&count=300&max_id=" + maxId;
             Thread.sleep(4000);
             qcount ++;
            }


        System.out.println("Finished downloading user status history.");

  }

    //connect with the input query
public void makeConnection(String https_url, String filex){
    URL url;
    try {     
         url = new URL(https_url);
         HttpsURLConnection con = (HttpsURLConnection)url.openConnection();          
         //dump all the content into an xml file
         print_content(con, filex);

    } 
    catch (MalformedURLException e) {
         e.printStackTrace();
    } 
    catch (IOException e) {
         e.printStackTrace();
    }

}

    //the print method for the xml file
private void print_content(HttpsURLConnection con, String filex){

    if(con!=null){

        try {           
           BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream()));

              destFile = new File("/" + filex);
              PrintWriter out = new PrintWriter(new FileWriter(MyIds.hoopoeData + destFile, true)); 
              String input;
              while ((input = br.readLine()) != null){
              out.println(input);
        }

           out.flush();
           out.close();
           br.close();  

        } 
        catch (IOException e) {
           e.printStackTrace();
        }   

    }

java - 穿越推特时间线

0 回答 0

Related

Reference