代码可以比我更好地解释这个问题。我还包括了我尝试过的替代方法。如果可能,请解释为什么这些其他方法也不起作用。我的想法已经用完了,遗憾的是 HtmlAgilityPack 的示例并不多。不过,我目前正在浏览文档以寻找更多想法。
我注意到的一件事是 .nextSibling 属性,我想我可以使用 while 循环来遍历表单,直到找不到下一个兄弟姐妹或表单结尾。
无论如何,这是代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
using System.Collections;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string source = @"
<form name='form1' action='action1' method='method1' id='id1'>
<input type='text1.1' name='name1.1' value='value1.1' />
<input type='text1.2' name='name1.2' value='value1.2' />
</form>
<form name='form2' action='action2' method='method2' id='id2'>
<input type='text2.1' name='name2.1' value='value2.1' />
<input type='text2.2' name='name2.2' value='value2.2' />
</form>
";
List<HtmlAttribute> formAttributes = new List<HtmlAttribute>();//this is what i'm wanting to get for the current form.
/**
* I want to end up with a list that has
* Name: type Value: text1.1
* Name: name Value: 1.1
* Name: value Value: value1.1
* Name: type Value: text1.2
* Name: name Value: name1.2
* Name: value Value: value1.2
* but I am ending up with the 2nd forms values as well
* */
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(source);
var forms = htmlDoc.DocumentNode.Descendants("form");
foreach (var form in forms)
{
Console.WriteLine(form.Attributes[0].Value); //simple writes the form name to the console to keep track of things
HtmlNodeCollection inputs = form.SelectNodes("/input"); // gets all the inputs in the selected form, or so I thought. This is where the problem lies. Result: Shows both forms inputs.
//HtmlNodeCollection inputs = form.SelectNodes("//input"); // not the best at xpath, but perhaps this could make a difference? Result: no difference
//var inputs = form.Elements("input"); // Maybe the inputs are referred to as elements? Result: shows no input outerhtml at all.
foreach (var input in inputs) //this has all 4 inputs from both forms. I only want it to have 2 inputs from the selected form.
{
Console.WriteLine(input.OuterHtml);
List<HtmlAttribute> attributes = new List<HtmlAttribute>();
attributes = input.Attributes.ToList<HtmlAttribute>();
foreach (var att in attributes)
{
//add attributes to allattributes list code that will be done once problem of getting only inputs for specified form is fixed
}
}
// here comes an alternate method! Edit: Didn't work :'(
//var inputs = form.Descendants("input"); // perhaps using the "Descendants class will make a difference. Result: Nope, didn't have any items at all!
//IEnumerator e = inputs.GetEnumerator();
//while (e.MoveNext())
//{
// Console.WriteLine("input: " + e.Current);
//}
Console.WriteLine(); // Simply making everything look pretty with a newline after each form name/input outerhtml display.
}
Console.Read();
}
}
}