状态机 XML 解析器(SAX 解析器)似乎最适合我。这是一个例子:
class StateHelper {
function __construct($filename) {
$this->p_count = 0;
$this->p_elements = array();
$this->in_p = FALSE;
$this->minimum_in_succession = 2;
$this->successive_element_data = array();
$parser = xml_parser_create();
xml_set_element_handler($parser, array($this, 'start_element'), NULL);
xml_set_character_data_handler($parser, array($this, 'character_data'));
$fp = fopen($filename, 'r')
or die ("Cannot open $filename");
while ($data = fread($fp, 4096)) {
xml_parse($parser, $data, feof($fp)) or
die(sprintf('XML ERROR: %s at line %d',
xml_error_string(xml_get_error_code($parser)),
xml_get_current_line_number($parser)));
}
xml_parser_free($parser);
$this->start_element(NULL, "end", NULL);
}
function start_element($parser, $element_name, $element_attrs) {
if ($element_name == 'P') {
$this->p_count += 1;
$this->in_p = TRUE;
} else {
if ($this->p_count >= $this->minimum_in_succession) {
$this->successive_element_data[] = $this->p_elements;
}
$this->p_elements = array();
$this->p_count = 0;
$this->in_p = FALSE;
}
}
function character_data($parser, $data) {
if ($this->in_p && strlen(trim($data))) {
$this->p_elements[] = $data;
}
}
}
$parseState = new StateHelper("example.html");
print_r($parseState->successive_element_data);
示例.html*
<html>
<head>
</head>
<body>
<p>Foo1</p>
<p>Foo2</p>
<p>Foo3</p>
<div>
<p>Bar1</p>
<p>Bar2</p>
</div>
<ul>
<li>
<p>Baz1</p>
<p>Baz2</p>
<p>Baz3</p>
<p>Baz4</p>
</li>
</ul>
</body>
</html>
输出
Array
(
[0] => Array
(
[0] => Foo1
[1] => Foo2
[2] => Foo3
)
[1] => Array
(
[0] => Baz1
[1] => Baz2
[2] => Baz3
[3] => Baz4
)
)