我想抓取每个子链接的信息,但程序显示错误。下面是我的完整配置文件。错误是由以下原因引起的:org.xml.sax.SAXParseException; 行号:6;列号:724;元素类型“t.length”必须后跟属性规范,“>”或“/>”。**
<?xml version="1.0" encoding="UTF-8"?>
<config>
<var-def name="webpage">
<html-to-xml>
<http url="http://www.thestar.com.my/business/" />
</html-to-xml>
</var-def>
<loop item="TheStarBiz" index="i">
<list>
<xpath expression="//div[@class='nine columns mobile3']">
<var name="webpage"></var>
</xpath>
</list>
<body>
<var-def name="title">
<xpath expression="(//p[@class='m'])/a/text()">
<var name="TheStarBiz"></var>
</xpath>
</var-def>
<var-def name="link">
<xpath expression="//p[@class='m']/a/@href">
<var name="TheStarBiz"></var>
</xpath>
</var-def>
<var-def name="new_url">
<xquery>
<xq-param name="TheStarBiz"><var name="TheStarBiz"/></xq-param>
<xq-expression><![CDATA[
declare variable $TheStarBiz as node() external;
let $url := data($TheStarBiz//p[@class='m']/a/@href)
return
$url
]]></xq-expression>
</xquery>
</var-def>
<var-def name="new_page_content">
<http url="${new_url}"/>
</var-def>
<var-def name="fulldesc">
<xpath expression="//div[@class='story']">
<var name="new_page_content"/>
</xpath>
</var-def>
<var-def name="textfile">
<file action="append" type="text" path="C:\Users\jacey\Desktop\WebHarvest\test.txt">
<template>
${title} ${sys.cr}${sys.lf}
${link} ${sys.cr}${sys.lf}
${new_page_content} ${sys.cr}${sys.lf}
</template>
</file>
</var-def>
</body>
</loop>
</config>