1

我想从 HTML 网页中选择一些项目并将它们放入列表中。每个项目都将是此类的一个实例:

public class shopItem
{
    private String itemName;
    private String itemImageLink;
    private Double itemPrice;

    public void shopItem(String itemName, String itemImageLink, Double itemPrice) 
    {
        this.itemName = itemName;
        this.itemImageLink = itemImageLink;
        this.itemPrice = itemPrice;
    }

    public String getItemName() 
    {
        return this.itemName;
    }

    public String getItemImageLink()
    {
        return this.itemImageLink;
    }

    public Double getItemPrice()
    {
        return this.itemPrice;
    }
}

HTML是这样的:

<div class="list_categorie_product">

<!-- Products list -->
<ul id="product_list_grid" class="categorie_product clear">

    </li>
        <li class="ajax_block_product  alternate_item clearfix">
      <p>
      <a href="http://thefrogco.com/polos/12-polo-2.html" class="product_img_link" title="Gris-Burdeos">
       <img src="http://thefrogco.com/12-111-large/polo-2.jpg" alt="Gris-Burdeos"  width="174" height="261" />
      </a>
      </p>
      <h3>
         <a href="http://thefrogco.com/polos/12-polo-2.html" class="product_link" title="Gris-Burdeos">Gris-Burdeos</a>
        </h3>
      <p id="p1">
       <!--<span class="new_product">

       </span>-->

                 <span class="new_product">

                                                 <span class="price"><!--<strike>30,00 €&lt;/strike>--><br />24,00 €&lt;/span>

       </span>

                 </p>


    </li>
        <li class="ajax_block_product  item clearfix">
      <p>
      <a href="http://thefrogco.com/polos/14-polo-4.html" class="product_img_link" title="Blanco-Marino">
       <img src="http://thefrogco.com/14-114-large/polo-4.jpg" alt="Blanco-Marino"  width="174" height="261" />
      </a>
      </p>
      <h3>
         <a href="http://thefrogco.com/polos/14-polo-4.html" class="product_link" title="Blanco-Marino">Blanco-Marino</a>
        </h3>
      <p id="p2">
       <!--<span class="new_product">

       </span>-->

                 <span class="new_product">

                                                 <span class="price"><!--<strike>30,00 €&lt;/strike>--><br />24,00 €&lt;/span>

       </span>

                 </p>


    </li>
        <li class="ajax_block_product last_item alternate_item clearfix">
      <p>
      <a href="http://thefrogco.com/polos/15-marron-turquesa.html" class="product_img_link" title="Marr&oacute;n-Turquesa">
       <img src="http://thefrogco.com/15-126-large/marron-turquesa.jpg" alt="Marr&oacute;n-Turquesa"  width="174" height="261" />
      </a>
      </p>
      <h3>
         <a href="http://thefrogco.com/polos/15-marron-turquesa.html" class="product_link" title="Marr&oacute;n-Turquesa">Marr&oacute;n-Turquesa</a>
        </h3>
      <p id="p3">
       <!--<span class="new_product">

       </span>-->

                 <span class="new_product">

                                                 <span class="price"><!--<strike>30,00 €&lt;/strike>--><br />24,00 €&lt;/span>

       </span>

                 </p>


    </li>
    </ul>

如您所见,我想存放每件马球衫。我使用 HTMLAgilityPack,但我不知道如何选择它们。这是我能得到的:

List<shopItem> itemsList = new List<shopItem>();

        HtmlDocument htmlDocument = new HtmlDocument();

        htmlDocument.LoadHtml("http://thefrogco.com/14-polos");

        foreach (HtmlNode selectNode in htmlDocument.DocumentNode.Elements("//div/div/li[@class='ajax_block_product last_item alternate_item clearfix']")) 
        {
            foreach(HtmlNde)
            {
            //I suppose i have to iterate all inside nodes...
            }
            shopItem detectedItem = new shopItem(); 
            itemsList.Add(selectNode.);
        }

太感谢了!

4

1 回答 1

1

像这样的东西:

HtmlDocument doc = new HtmlDocument();
doc.Load(myDocHtm);

// get all LI elements with a CLASS attribute that starts with 'ajax_block_product'
foreach (HtmlNode selectNode in doc.DocumentNode.SelectNodes("//li[starts-with(@class,'ajax_block_product')]")) 
{
    // from the current node, get recursively the first A element with a CLASS attribute set to 'product_link'
    HtmlNode name = selectNode.SelectSingleNode(".//a[@class='product_link']");

    // from the current node, get recursively the first IMG element with a non empty SRC attribute
    HtmlNode img = selectNode.SelectSingleNode(".//img[@src]");

    // from the current node, get recursively the first SPAN element with a CLASS attribute set to 'price'
    // and get the child text node from it
    HtmlNode price = selectNode.SelectSingleNode(".//span[@class='price']/text()");

    shopItem item = new shopItem(
        name.InnerText,
        img.GetAttributeValue("src", null),
        double.Parse(price.InnerText, NumberStyles.Any)
        );
    itemsList.Add(item);
}            
于 2013-04-23T21:09:07.260 回答