我用 C# 编写了一个爬虫程序,它用于在给定的 url 或带有页码后缀的 url 上爬取并从中下载所有图像文件。它工作得很好。现在我是 android 编程的新手,我想为我的 android 设备写同样的东西,这样我也可以在我的手机上使用它。
我遵循的算法是......
1) 获取基本 url、起始页码(如果 url 在查询字符串中以页码后缀)、结束页码和存储图像在 sdcard 上的位置。
2)如果结束页号小于起始页号(意味着如果我只想抓取单个页面)将其传递给 getHtml 方法。否则从开始到结束页面循环并将每个 url 传递给 getHtml 方法。
3) 在 getHtml 方法中,我下载了网页源并将其分成几部分以在其中找到指向图像文件的链接。
4) 对于找到的每个图像 url,将图像文件下载到给定的保存位置。
算法看起来很简单,但是当我制作整个程序时,我遇到了一些相当大的性能问题。它是如此沉重,以至于在模拟器中运行它时,我只能看到 gc 清除 logcat 中的对象。另一个非常常见的问题是 UI 挂起。但由于该程序仅供个人使用,我可以使用它,因为我不知道安卓中的多线程。但至少程序应该很快。有什么办法可以减少对象的数量或自己销毁它们。
我能做些什么来改善这一点。我想知道这些 GB 的游戏是如何完美运行的,而我的 20KB 应用程序却如此缓慢。
整个代码是。
package com.dwnldr;
import java.io.*;
import java.net.*;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import android.app.Activity;
import android.os.*;
import android.util.Log;
import android.view.View;
import android.widget.*;
public class IMGDwnldrActivity extends Activity {
EditText res;
String str;
@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.main);
Button btn = (Button) findViewById(R.id.button1);
File SDCardRoot = Environment.getExternalStorageDirectory();
EditText saveat = (EditText) findViewById(R.id.editText4);
saveat.setText(SDCardRoot.getAbsolutePath());
btn.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
try {
//Getting URL,Start pageno,End pageno and the save location on sdcard
EditText baseurl = (EditText) findViewById(R.id.editText1);
String url = baseurl.getText().toString();
EditText startpage = (EditText) findViewById(R.id.editText2);
int start = Integer.parseInt(startpage.getText().toString());
EditText endpage = (EditText) findViewById(R.id.editText3);
int end = Integer.parseInt(endpage.getText().toString());
EditText saveat = (EditText) findViewById(R.id.editText4);
String save = saveat.getText().toString();
if (start <= end) {
for (int i = start; i <= end; i++) {
str = "\n--------------------";
str += "\nPage No" + String.valueOf(i);
writemsg(str);
getHtml(url + String.valueOf(i), save);
}
} else
getHtml(url, save);
writemsg("Done");
} catch (Exception ee) {
writemsg("\nException fired::" + ee.getMessage());
}
}
});
}
//method to get the source of a particular url
public void getHtml(String url, String save) throws ClientProtocolException, IOException {
try {
HttpClient httpClient = new DefaultHttpClient();
HttpContext localContext = new BasicHttpContext();
HttpGet httpGet = new HttpGet(url);
HttpResponse response = httpClient.execute(httpGet, localContext);
String result = "";
str = "\nDownloading Page....";
writemsg(str);
BufferedReader reader = new BufferedReader(new InputStreamReader(response.getEntity().
getContent()));
String line = null;
while ((line = reader.readLine()) != null) {
result += line + "\n";
}
str = "\nPage Downloaded...";
writemsg(str);
String[] pieces;
if (result.contains(".jpg") || result.contains(".jpeg")) {
pieces = result.split("\"");
Log.d("Events", String.valueOf(pieces.length));
for (int i = 0; i < pieces.length; i++) {
if (pieces[i].contains(".jpg") || pieces[i].contains(".jpeg")) {
if (pieces[i].contains("http")) {
Log.d("Events", pieces[i]);
downloadme(pieces[i], save);
} else {
URL u = new URL(url);
if (pieces[i].startsWith("."));
pieces[i] = pieces[i].substring(pieces[i].indexOf("/"), pieces[i].length());
writemsg(u.getProtocol() + "://" + u.getHost() + pieces[i]);
if (pieces[i].startsWith("/"))
downloadme(u.getProtocol() + "://" + u.getHost() + pieces[i], save);
else
downloadme(u.getProtocol() + "://" + u.getHost() + "/" + pieces[i], save);
}
}
}
}
} catch (Exception ee) {
writemsg("\nException fired::" + ee.getMessage());
}
}
//download each image url given
private void downloadme(String url1, String save) {
try {
str = "\nDownloading Image " + url1;
writemsg(str);
URL url = new URL(url1);
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
urlConnection.setRequestMethod("GET");
urlConnection.setDoOutput(true);
urlConnection.connect();
File f = new File(save);
if (f.isDirectory() && !f.exists())
f.mkdirs();
String fileName = url1.substring(url1.lastIndexOf('/') + 1, url1.length());
File file = new File(save, fileName);
FileOutputStream fileOutput = new FileOutputStream(file);
InputStream inputStream = urlConnection.getInputStream();
int totalSize = urlConnection.getContentLength();
str = "\nImage Size " + String.valueOf(totalSize / 1024);
writemsg(str);
byte[] buffer = new byte[1024];
int bufferLength = 0; //used to store a temporary size of the buffer
while ((bufferLength = inputStream.read(buffer)) > 0) {
fileOutput.write(buffer, 0, bufferLength);
}
fileOutput.close();
str = "\nDownloaded Image " + url1;
writemsg(str);
catch some possible
errors // ...
} catch (MalformedURLException e) {
writemsg("\nException fired::" + e.getMessage());
} catch (IOException e) {
writemsg("\nException fired::" + e.getMessage());
} catch (Exception ee) {
writemsg("\nException fired::" + ee.getMessage());
}
}
//write certain text to the Result textbox
private void writemsg(String msg) {
res = (EditText) findViewById(R.id.result);
String str = res.getText().toString();
str += msg;
res.setText(str);
}
}