I am working on an app that searches for email addresses in Google search results' URLs. The problem is it needs to return the value it found in each page + the URL in which it found the email, to a datagridview with 2 columns: Email and URL. I am using Parallel.ForEach for this one but of course it returns random URLs and not the ones it really found the email on.
public static string htmlcon; //htmlsource
public static List<string> emailList = new List<string>();
public static string Get(string url, bool proxy)
{
htmlcon = "";
try
{
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
if (proxy)
req.Proxy = new WebProxy(proxyIP + ":" + proxyPort);
req.Method = "GET";
req.UserAgent = Settings1.Default.UserAgent;
if (Settings1.Default.EnableCookies == true)
{
CookieContainer cont = new CookieContainer();
req.CookieContainer = cont;
}
WebResponse resp = req.GetResponse();
StreamReader SR = new StreamReader(resp.GetResponseStream());
htmlcon = SR.ReadToEnd();
Thread.Sleep(400);
resp.Close();
SR.Close();
}
catch (Exception)
{
Thread.Sleep(500);
}
return htmlcon;
}
private void copyMails(string url)
{
string emailPat = @"(\b[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b)";
MatchCollection mailcol = Regex.Matches(htmlcon, emailPat, RegexOptions.Singleline);
foreach (Match mailMatch in mailcol)
{
email = mailMatch.Groups[1].Value;
if (!emailList.Contains(email))
{
emailList.Add(email);
Action dgeins = () => mailDataGrid.Rows.Insert(0, email, url);
mailDataGrid.BeginInvoke(dgeins);
}
}
}
private void SEbgWorker_DoWork(object sender, DoWorkEventArgs e)
{
//ALOT OF IRRELEVAMT STUFF BEING RUN
Parallel.ForEach(allSElist.OfType<string>(), (s) =>
{
//Get URL
Get(s, Settings1.Default.Proxyset);
//match mails 1st page
copyMails(s);
});
}
so this is it: I execute a Get request(where "s" is the URL from the list) and then execute copyMails(s) from the URL's html source. It uses regex to copy the emails. If I do it without parallel it returns the correct URL for each email in the datagridview. How can I do this parallel an still get the correct match in the datagridview?
Thanks