这是我第一次尝试“真正的”C# 程序。它需要一个指定的目录,提取文件名(不带扩展名)并将它们写入 SQL 数据库。然后将该数据读回一个数组并传递到下面的“foreach”循环中。然后循环使用数据搜索 IMDB 并将第一个结果的 URL 存储到数据库中。然后它将这些数据读回一个变量,并使用它从页面中“抓取”数据,例如导演、演员、情节等。
我让程序直接在最后用导演、演员、情节等数据更新数据库。我已经深入研究了程序并且所有变量都包含正确的值,就在表单在 DataGrid 中加载表格时,它显示了我在循环中之前添加的所有数据,但不显示 director 等。
由于这些原因,我认为我在程序末尾的 SQL 语句可能是错误的。我知道代码可能效率低下且混乱,但我对这一切都很陌生,所以放轻松!
foreach (string title in titles)
{
//Use each title in titles array to search IMDB and return the page URL
string searchURL = "http://www.imdb.com/find?s=all&q=" + title;
string url = searchURL;
string sourceCode = WorkerClass.getSourceCode(url);
int startIndex = sourceCode.IndexOf("Media from ");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("<a href=") + 9;
int endIndex = sourceCode.IndexOf('"' + " onclick", startIndex);
string link = "http://www.imdb.com" + (sourceCode.Substring(startIndex, endIndex - startIndex));
//Update DB to add page url based on title
SqlConnection con = new SqlConnection(DataAccess.GetConnectionString("dbCon"));
//Create SQL Command
var command = new SqlCommand("UPDATE movieTable SET imdbPageURL=@pageURL WHERE title=@title", con);
command.Parameters.AddWithValue("@pageURL", link);
command.Parameters.AddWithValue("@title", title);
con.Open();
//Add to DB
command.ExecuteNonQuery();
con.Close();
//Select IMDB Page URL from movieTable where the title = current title
var com = new SqlCommand("SELECT imdbPageURL FROM movieTable WHERE title=@title", con);
con.Open();
com.Parameters.AddWithValue("@title", title);
string pageURL = (string)com.ExecuteScalar();
con.Close();
//Get Director
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("description");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("content=") +21;
endIndex = sourceCode.IndexOf('.' , startIndex);
string director = sourceCode.Substring(startIndex, endIndex - startIndex);
//Get Cast
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("content=");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf('.') +2;
endIndex = sourceCode.IndexOf("/>", startIndex) -3;
string cast = sourceCode.Substring(startIndex, endIndex - startIndex);
//Get Plot
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("Users:");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("</div>");
endIndex = sourceCode.IndexOf("<div", startIndex);
sourceCode = sourceCode.Substring(startIndex, endIndex - startIndex);
startIndex = sourceCode.IndexOf("<p>") +7;
endIndex = sourceCode.IndexOf("</p>");
string plot = sourceCode.Substring(startIndex, endIndex - startIndex);
//Get Rating
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("infobar");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("alt=") +5;
endIndex = sourceCode.IndexOf("src=", startIndex) -2;
string rating = sourceCode.Substring(startIndex, endIndex - startIndex);
//Get Release Date
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("infobar");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("nobr");
endIndex = sourceCode.IndexOf("</div>", startIndex);
sourceCode = sourceCode.Substring(startIndex, endIndex - startIndex);
startIndex = sourceCode.IndexOf("dates") +11;
endIndex = sourceCode.IndexOf("</a") -4;
string releaseDate = sourceCode.Substring(startIndex, endIndex - startIndex);
//Get link to Cover Image
sourceCode = WorkerClass.getSourceCode(pageURL);
startIndex = sourceCode.IndexOf("img_primary");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
startIndex = sourceCode.IndexOf("<img src=") + 10;
endIndex = sourceCode.IndexOf(".jpg", startIndex) +4;
string coverURL = sourceCode.Substring(startIndex, endIndex - startIndex);
//Update movieTable with scraped data for the current title
var comd = new SqlCommand("UPDATE movieTable SET director=@director, cast=@cast, plot=@plot, rating=@rating, releaseDate=@releaseDate, coverURL=@coverURL WHERE title=@title", con);
comd.Parameters.AddWithValue("@title", title);
comd.Parameters.AddWithValue("@director", director);
comd.Parameters.AddWithValue("@cast", cast);
comd.Parameters.AddWithValue("@plot", plot);
comd.Parameters.AddWithValue("@rating", rating);
comd.Parameters.AddWithValue("@releaseDate", releaseDate);
comd.Parameters.AddWithValue("@coverURL", coverURL);
con.Open();
//Add to DB
command.ExecuteNonQuery();
con.Close();
}
this.movieTableTableAdapter.Fill(this.movieLibraryDBDataSet.movieTable);