我编写了一个网页抓取程序来访问页面列表并将所有 html 写入文件。问题是,当我拉出一段文本时,一些字符会写成“�”。如何将这些字符拉到我的文本文件中?这是我的代码:
string baseUri = String.Format("http://www.rogersmushrooms.com/gallery/loadimage.asp?did={0}&blockName={1}", id.ToString(), name.Trim());
// our third request is for the actual webpage after the login.
HttpWebRequest request =
(HttpWebRequest)WebRequest.Create(baseUri);
request.Method = "GET";
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)";
//get the response object, so that we may get the session cookie.
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader reader = new StreamReader(response.GetResponseStream());
// and read the response
string page = reader.ReadToEnd();
StreamWriter SW;
string filename = string.Format("{0}.txt", id.ToString());
SW = File.AppendText("C:\\Share\\" + filename);
SW.Write(page);
reader.Close();
response.Close();