总结: 我的 Jsoup 解析器可以独立运行,但是一旦复制粘贴到我的 Android 应用程序的 AsyncTask 任务类之一中,它就无法收集任何值。返回的二维数组只填充了空值。
长版: 我一直在开发一个应用程序,它通过 Jsoup 使用页面抓取来从各种博客中提取和显示内容。到目前为止,我已经编写了一些解析器,并且似乎都按预期工作。不幸的是,我最近的解析器(为 nyc-shows.brooklynvegan.com 编写)一直存在问题。
这是解析器方法本身,由添加了打印语句的 main 方法调用。自己运行这个。它有效(不完美,但有效)。
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Main {
static String TAG_EVENT = "li.ds-entry";
static String TAG_TITLE = ".ds-entry-title";
static String TAG_LOCATION = ".location";
static String TAG_DATE_AND_TIME = ".ds-date";
static String TAG_TICKET_URL = ".ds-buy-tickets";
static String FEED_URL = "http://nyc-shows.brooklynvegan.com/";
public static void main(String[] args) throws IOException {
String values[][] = new String[50][6];
values = getFeedItems();
for (int i=0; i<values.length; i++) {
for (int j=0; j<6; j++) {
System.out.println(values[i][j]);
}
System.out.println("-----------------");
}
}
public static String[][] getFeedItems() throws IOException {
Document doc = null;
String values[][] = new String[50][6];
try{
doc = Jsoup.connect(FEED_URL).timeout(0).get();
Elements events = doc.select(TAG_EVENT);
String delimSpace = "[ ]";
int i = 0;
for (Element event : events) {
//Set event title
Element title = event.select(TAG_TITLE).first();
String titleString = title.text();
if (title != null) {
boolean isFake = checkFake(titleString);
if (!isFake) {
values[i][0] = titleString;
}
else {
continue;
}
}
//Set event date and time i guess
Element dateAndTime = event.select(TAG_DATE_AND_TIME).first();
if (dateAndTime != null) {
String[] dateAndTimeTokens = dateAndTime.text().split(delimSpace);
String date = dateAndTimeTokens[1];
String time = dateAndTimeTokens[3];
values[i][1] = date;
values[i][2] = time;
}
//Set price (tbd)
values[i][3] = "See Ticket";
//Set location
Element location = event.select(TAG_LOCATION).first();
if (location != null) {
values[i][4] = location.text();
}
//Set ticket urls
Element ticketContainer = event.select(TAG_TICKET_URL).first();
if (ticketContainer != null) {
String ticket = ticketContainer.select("a").attr("href");
values[i][5] = ticket;
}
else {
values[i][3] = "Free";
}
i++;
} //End of event loop
} //End of try clause
catch (IOException e) {
e.printStackTrace();
}
return values;
}
public static boolean checkFake(String s) {
boolean isFake = false;
String[] days = {"Today", "Tomorrow", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};
for (int i=0; i<days.length; i++) {
if (s.contains(days[i])) {
isFake = true;
return isFake;
}
}
return isFake;
}
}
现在,这是传输到 AsyncTask 中的完全相同的方法,以便在显示加载屏幕时由我的应用程序在后台运行。
package com.example.nylist;
import java.io.IOException;
import android.app.Activity;
import android.content.Context;
import android.os.AsyncTask;
import android.util.Log;
import android.widget.Toast;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class BVParser extends AsyncTask<Void, Void, String[][]> {
static String TAG_EVENT = "li.ds-entry";
static String TAG_TITLE = ".ds-entry-title";
static String TAG_LOCATION = ".location";
static String TAG_DATE_AND_TIME = ".ds-date";
static String TAG_TICKET_URL = ".ds-buy-tickets";
static String FEED_URL = "http://nyc-shows.brooklynvegan.com/";
Context context;
Activity activity;
public BVParser(Activity context) {
this.context = context.getApplicationContext();
this.activity = context;
}
@Override
protected void onPreExecute() {
super.onPreExecute();
Toast.makeText(context, "Fetching...", Toast.LENGTH_LONG).show();
}
@Override
protected String[][] doInBackground(Void... param) {
String values[][] = new String[50][6];
try {
values = getFeedItems();
}
catch (IOException e) {
Log.d("ASSERT", "Exception occured during doInBackground", e);
e.printStackTrace();
}
Log.d("ASSERT", ("values successfully returned by doInBackground, first title is: "+values[0][0]));
return values;
}
protected void onPostExecute(String[][] result) {
super.onPostExecute(result);
int eventCount = result.length;
Log.d("ASSERT", ("event count in onPostExecute is: "+eventCount));
ListRow[] listrow_data = new ListRow[eventCount];
ListRow temp;
for (int i=0; i<eventCount; i++) {
if (result[i] != null) {
temp = new ListRow(context, result[i][0], result[i][1], result[i][2],
result[i][3], result[i][4], result[i][5], i);
listrow_data[i] = temp;
}
}
((EventList) activity).setList(listrow_data);
}
public String[][] getFeedItems() throws IOException {
Document doc = null;
String values[][] = new String[50][6];
int i = 0;
try{
Log.d("ASSERT","Made it to try block");
doc = Jsoup.connect(FEED_URL).timeout(0).get();
Elements events = doc.select(TAG_EVENT);
Log.d("ASSERT","printing events, whatever it is: "+events);
String delimSpace = "[ ]";
//******THIS LOOP NEVER BEGINS*****//
for (Element event : events) {
Log.d("ASSERT","Made it to getFeedItems's main for loop");
//Set event title
Element title = event.select(TAG_TITLE).first();
String titleString = title.text();
Log.d("ASSERT","This title is: "+titleString);
boolean isFake = checkFake(titleString);
if (!isFake) {
values[i][0] = titleString;
}
else {
continue;
}
//Set event date and time i guess
Element dateAndTime = event.select(TAG_DATE_AND_TIME).first();
if (dateAndTime != null) {
String[] dateAndTimeTokens = dateAndTime.text().split(delimSpace);
String date = dateAndTimeTokens[1];
String time = dateAndTimeTokens[3];
values[i][1] = date;
values[i][2] = time;
}
//Set price
values[i][3] = "See Ticket";
//Set location
Element location = event.select(TAG_LOCATION).first();
if (location != null) {
values[i][4] = location.text();
}
//Set ticket urls
Element ticketContainer = event.select(TAG_TICKET_URL).first();
if (ticketContainer != null) {
String ticket = ticketContainer.select("a").attr("href");
values[i][5] = ticket;
}
else {
values[i][3] = "Free";
}
i++;
} //End of event loop
} //End of try clause
catch (IOException e) {
Log.d("ASSERT","Exception during getFeedItems");
e.printStackTrace();
}
Log.d("ASSERT","The first title in getFeedItems before returning is: "+values[0][0]);
return values;
}
private static boolean checkFake(String s) {
boolean isFake = false;
String[] days = {"Today", "Tomorrow", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};
for (int i=0; i<days.length; i++) {
if (s.contains(days[i])) {
isFake = true;
return isFake;
}
}
return isFake;
}
}
调试尝试:
我在整个代码中添加了日志语句以调试问题。如果你运行它,你会发现问题似乎发生在 getFeedItems() 本身的某个地方,特别是在“try”块中。尽管出现了 try 语句开头的 log 语句,但运行的 for 循环events
根本没有运行,因为它开头的 log 语句从不打印。
问题:
有人可以解释为什么循环events
没有开始吗?是否为events
空,如果是,为什么?为什么它自己运行的方法与我的 AsyncTask 中运行的方法之间存在差异?我一直在扯头发。此解析器中的逻辑几乎与我编写的(工作的)其他逻辑中的逻辑相同,但是这将返回一个只有空值的二维数组。我什至开始理解逻辑错误可能在哪里,但我似乎找不到错字。
PS: 如果将此与我的其他解析器进行比较会有所帮助,请告诉我,我会发布源代码。提前致谢。