@user1518659 在这里试试这个,要解决这个问题,只需
在传递给 DOMDocument 之前用空格替换,我还添加了 firstname last name 的拆分:) 希望它有所帮助。
<?php
header('Content-Type: text/html; charset=utf-8'); //Required if your outputting, as the description contains utf-8 characters
//Load the source (input)
$html_source = file_get_contents('http://www.reuters.com/finance/stocks/companyOfficers?symbol=AOS');
$html_source = str_replace(' ',' ',$html_source);
//Dom document
$dom = new DOMDocument('1.0');
@$dom->loadHTML($html_source);
$out =array();
$i=0;
foreach($dom->getElementsByTagName('table') as $table) {
if($table->getAttribute('class')=='dataTable'){
foreach($table->getElementsByTagName('tr') as $tr){
if(isset($tr->getElementsByTagName('td')->item(0)->nodeValue)){
$out[$i]['fullname'] = $tr->getElementsByTagName('td')->item(0)->nodeValue;
$name = explode(' ',$out[$i]['fullname']);
$out[$i]['first_name'] = $name[0];
$out[$i]['last_name'] = $name[1];
if(!isset($tr->getElementsByTagName('td')->item(2)->nodeValue)){
foreach ($out as $key=>$value){
if($value['fullname'] == $tr->getElementsByTagName('td')->item(0)->nodeValue &&
!is_numeric(substr($tr->getElementsByTagName('td')->item(1)->nodeValue,0,1)) &&
$tr->getElementsByTagName('td')->item(1)->nodeValue != "--" ){
$out[$key]['description']= $tr->getElementsByTagName('td')->item(1)->nodeValue;
}
}
}else{
if(!isset($tr->getElementsByTagName('td')->item(2)->nodeValue)){continue;}
if(isset($tr->getElementsByTagName('td')->item(3)->nodeValue)){
$out[$i]['age']= $tr->getElementsByTagName('td')->item(1)->nodeValue;
$out[$i]['since']= $tr->getElementsByTagName('td')->item(2)->nodeValue;
$out[$i]['position']= $tr->getElementsByTagName('td')->item(3)->nodeValue;
}
}
$i++;
}
}
}
}
//Clean up
$return = array();
foreach ($out as $key=>$row){
if(isset($row['fullname']) && isset($row['age']) && isset($row['since']) && isset($row['position']) && isset($row['description'])){
$return[$key] = $out[$key];
}
}
print_r($return);
/*
Array
(
[0] => Array
(
[fullname] => Paul Jones
[first_name] => Paul
[last_name] => Jones
[age] => 63
[since] => 2011
[position] => Chairman of the Board, Chief Executive Officer
[description] => Mr. Paul W. Jones serves as the Chairman of the Board, Chief Executive Officer of A. O. Smith Corp. He has been a director of company since 2004. He is a member of the Investment Policy Committee of the Board. He was elected chairman of the board, president and chief executive officer effective December 31, 2005. He was president and chief operating officer from 2004 to 2005. Prior to joining the company, he was chairman and chief executive officer of U.S. Can Company, Inc. from 1998 to 2002. He previously was president and chief executive officer of Greenfield Industries, Inc. from 1993 to 1998 and president from 1989 to 1992. Mr. Jones has been a director of Federal Signal Corporation since 1998, where he chairs the Nominating and Governance Committee and is a member of the Compensation and Benefits Committee and the Executive Committee, and Integrys Energy Group, Inc. since 2011, where he is a member of the Compensation and Financial Committees. He was also a director of Bucyrus International, Inc. from 2006 until its acquisition by Caterpillar, Inc. in 2011, and chaired the Compensation Committee.
)
[1] => Array
(
[fullname] => Ajita Rajendra
[first_name] => Ajita
[last_name] => Rajendra
[age] => 60
[since] => 2011
[position] => President, Chief Operating Officer, Director
[description] => Mr. Ajita G. Rajendra serves as the President, Chief Operating Officer and Director of A. O. Smith Corp. He was elected a director of company in December 2011, based on the recommendation of the Nominating and Governance Committee, following his election as President and Chief Operating Officer in September 2011. Mr. Rajendra joined the company as President of A. O. Smith Water Products Company in 2005, and was named Executive Vice President of the company in 2006. Prior to joining the company, Mr. Rajendra was Senior Vice President at Kennametal, Inc., a manufacturer of cutting tools, from 1998 to 2004. Mr. Rajendra also serves on the board of Donaldson Company, Inc., where he is a member of the Audit Committee and Human Resources Committee. Further, Mr. Rajendra was a director of Industrial Distribution Group, Inc. from 2007 until its acquisition by Eiger Holdco, LLC in 2008.
)
...
...
*/
?>