正如你所说......你必须做一些技巧。
foreach (var tr in doc.DocumentNode.SelectNodes("//tr")) {
var style = tr.SelectSingleNode(".//style");
//Find the classes and ids with {display:none}
var matches = Regex.Matches(style.InnerText, @"(\.|#)(.+?)\s*{\s*display\s*:\s*none");
//Here we will store the classes & ids we'll need to remove
List<string> classes = new List<string>();
List<string> ids = new List<string>();
//Storing the ids and classes
foreach (Match m in matches) {
var type = m.Groups[1].Value;
if (type == ".")
{
classes.Add(m.Groups[2].Value);
}
else {
ids.Add(m.Groups[2].Value);
}
}
foreach (var n in tr.SelectNodes(".//*")) {
if (Remove(n, classes, ids)) {
n.Remove();
}
}
var proxy = tr.SelectSingleNode("./td[2]/span").InnerText;
var port = tr.SelectSingleNode("./td[3]").InnerText.Trim('\r', '\n', ' ');
}
并有以下方法
//Remove the ones that have {display:none}, and the ones with the ids & classes provided.
static bool Remove(HtmlNode x, IList<string> classes, IList<string> ids) {
var classAttr = x.GetAttributeValue("class", "");
var idAttr = x.GetAttributeValue("id", "");
return (x.Name == "span" && x.GetAttributeValue("style", "") == "display:none") ||
(x.Name == "div" && x.GetAttributeValue("style", "") == "display:none") ||
(x.Name == "span" && classes.Contains(classAttr)) ||
(x.Name == "div" && classes.Contains(classAttr)) ||
(x.Name == "span" && ids.Contains(idAttr)) ||
(x.Name == "id" && ids.Contains(idAttr)) ||
(x.Name == "style");
}
Remove
您可以在该方法中添加更多过滤器。由于您正在编写 C# 代码,因此您可以在那里检查您想要的任何内容,而不仅仅是与XPath表达式相关的内容。
该代码给了我以下代理列表:
190.199.36.220:8080
177.139.137.107:3128
103.247.23.90:8080
222.124.130.203:8080
14.140.241.242:8080
175.103.37.10:8080
110.77.183.113:3128
54.243.51.203:8118
200.90.179.90:8080
213.152.173.137:8080
187.17.212.162:8080
62.201.207.14:8080
77.123.76.178:8080
189.76.212.254:3128
89.218.224.234:9090
221.179.173.170:8080
187.84.56.42:3128
118.99.79.13:8080
211.86.157.110:3128
189.38.3.122:3128
2.135.238.178:9090
2.135.238.2:9090
122.50.38.128:3128
217.11.185.251:3128
82.200.254.2:9090
37.59.82.253:8080
83.111.38.131:3128
85.118.227.76:3128
182.30.249.13:8080
124.88.154.3:6673
111.13.87.150:80
190.85.37.90:8080
219.117.232.133:3128
211.100.47.138:8990
46.32.21.195:8080
107.18.121.126:8080
118.97.191.203:8080
119.195.32.211:3128
2.133.92.242:9090
202.164.217.18:8080
222.124.214.194:3128
79.140.17.253:3128
61.138.104.30:1080
201.45.116.138:3128
190.98.209.168:3128
190.204.222.183:8080
200.199.173.122:3128
197.159.16.58:8080
223.4.233.164:3128
212.93.195.229:3128