1

我有来自我公司网站的这个 HTML 代码。由于我无权访问数据库,因此我想通过 HTML 文件解析并返回值。代码是这样的:

<?php
$string = '
<p> <b>HEADER INFO</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>View Object:</b> 6600422</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BPO:</b> G37147359-000000</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Ack Date:</b> 2012-05-28</font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=3><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Operation(s):</b> PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>SAP Sales Order Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Customer P.O. Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Legacy Order Number</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">0310363858</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">77340892008-120413</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">89FF09378001</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>PL</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Product #</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Options</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial #</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">3C</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
    <td valign=top colspan=1>&nbsp </td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>Station Info</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Start Station:</b> JPN_End</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Location:</b> Done</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Station:</b> </font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Birth Date/Time:</b> 2012-05-23 14:20:32 SGT</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Power Cord:</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Voltage:</b></font></td>
  </tr>
</table>
</p>
<hr>
<p> <b>MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Part Number</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Description</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BB Type</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Material Location</b></font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial Number</b></font></td>
  </tr>
  <tr align=left>
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B@@</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">OEM Generic 1U SAS Enclosure</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">BOM</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">ASSY</font></td>
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
  </tr>
</table>
</p>
 ';

 $result = parse_data($string);

extract($result);

echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';
echo $pl.'<br />';
echo $pn.'<br />';


function parse_data($string){
$string = str_replace('&nbsp;&nbsp;','',$string);

$xml = new DOMDocument();
@$xml->loadHTML($string);

$ret = array();

foreach($xml->getElementsByTagName('p') as $p) {
    $header = trim($p->nodeValue);
}

foreach($xml->getElementsByTagName('td') as $td) {
    $value = trim($td->nodeValue);
    if(!empty($value) && is_numeric($value{0})){
        $ret[] = $value;
    }
}

$ret = array('headertext'=>$header,
             'sapSON'=>$ret[0],
             'custPON'=>$ret[1],
             'legacyON'=>$ret[2],
             'pl'=>$ret[3],
             'pn'=>$ret[4],);

return $ret;
}
?>

现在我想将标题“外部订单号参考”保存到我可以稍后调用的 i 变量中。

此外,第一行的第二、第三和第四列分别对应于第二行的第二、第三和第四列的值。我还想将这些值保存到变量中。所以基本上,我需要一个 PHP 脚本来解析这个 HTML 文件并返回以下内容:

$header1 = "HEADER INFO";
$viewObject = "6600422";
$BPO = "G37147359-000000";
$AckDate = "2012-05-28";
$Operations = "PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End";
$header2 = "EXTERNAL ORDER NUMBER REFERENCE";
$sapSON = "0310363858";
$custPON = "77340892008-120413";
$legacyON = "89FF09378001";
$header3 = "PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)"
$pl = "3C";
$pn = "AP703B";
$qty = "1";
$options = "&nbsp;";
$serialNo = "2S6219000G";

ETC ...基本上,我需要将所有表格内容保存到变量中,因为我稍后会将它们保存到我的数据库中并从中创建一个报告并生成一些详细信息的条形码

谢谢您的帮助!

仅供参考:我无权访问数据库,所以我所能做的就是通过这个 HTML 文件进行解析并将值保存到变量中,以便以后存储到我的数据库中。另外,请注意标题是恒定的,唯一变化的值是用于不同订单的数字。

4

1 回答 1

2

在这里试试这个,See it in action

<?php
$string = '<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
    <table width=100% cellspacing=0>
      <tr align=left>
        <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>SAP Sales Order Number</b></font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Customer P.O. Number</b></font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Legacy Order Number</b></font></td>
      </tr>
      <tr align=left>
        <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">0310363858</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">77340892008-120413</font></td>
        <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">89FF09378001</font></td>
  </tr>
    </table>
</p>
';

$result = parse_data($string);

extract($result);

echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';


function parse_data($string){
    $string = str_replace('&nbsp;&nbsp;','',$string);

    $xml = new DOMDocument();
    @$xml->loadHTML($string);

    $ret = array();

    foreach($xml->getElementsByTagName('p') as $p) {
        $header = trim($p->nodeValue);
    }

    foreach($xml->getElementsByTagName('td') as $td) {
        $value = trim($td->nodeValue);
        if(!empty($value) && is_numeric($value{0})){
            $ret[] = $value;
        }
    }

    $ret = array('headertext'=>$header,
                 'sapSON'=>$ret[0],
                 'custPON'=>$ret[1],
                 'legacyON'=>$ret[2]);

    return $ret;
}
?>

编辑版本 2(多行):

由于每次迭代您的表都不同,因此它变得非常复杂,但我喜欢挑战。加油,希望对你有帮助...

<?php
$result = parse_data($string);

//Create Variables From Values
foreach($result as $key=>$value){
    foreach($value as $key_b=>$value_b){
        $$key_b = $value_b;
    }
}
/* --New Available Variables--
    $header0 = HEADER INFO
    $ViewObject = 6600422
    $BPO = G37147359-000000
    $AckDate = 2012-05-28
    $Operations = PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End
    $header1 = EXTERNAL ORDER NUMBER REFERENCE
    $SAPSalesOrderNumber = 0310363858
    $CustomerPONumber = 77340892008-120413
    $LegacyOrderNumber = 89FF09378001
    $header2 = PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)
    $PL = 3C
    $Product = AP703B
    $Qty = 1
    $Options =  
    $Serial = 2S6219000G
    $header3 = Station Info
    $StartStation = JPN_End
    $Location = Done
    $Station = 
    $BirthDateTime = 2012-05-23 14
    $PowerCord = 
    $Voltage = 
    $header4 = MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)
    $PartNumber = AP703B@@
    $Description = OEM Generic 1U SAS Enclosure
    $BBType = BOM
    $MaterialLocation = ASSY
    $SerialNumber = 2S6219000G
*/

function parse_data($string){
    $string = str_replace('&nbsp;&nbsp;','',$string);
    $parts = explode('<hr>',$string);

    $html = new DOMDocument();
    $ret = array();
    $entry=0;
    foreach($parts as $part){
        @$html->loadHTML($part);
        //Get Header
        foreach($html->getElementsByTagName('p') as $p) {
            $ret[$entry]['header'.$entry] = trim($p->nodeValue);
        }
        $i=0;
        foreach($html->getElementsByTagName('td') as $td){
            $value = trim($td->nodeValue);
            if(empty($value)){
                continue;
            }
            switch($entry){
                case 0:
                    $split = explode(':',$value);
                    $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
                    break;
                case 1:
                    if(!is_numeric($value{0})){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-3])] = trim($value);
                        unset($ret[$entry][$i-3]);
                    }
                    break;
                case 2:
                    if($i<=4){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-5])] = trim($value);
                        unset($ret[$entry][$i-5]);
                    }
                    break;
                case 3:
                    $split = explode(':',$value);
                    $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
                    break;
                case 4:
                    if($i<=5){
                        $ret[$entry][$i] = trim($value);
                    }else{
                        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-6])] = trim($value);
                        unset($ret[$entry][$i-6]);
                    }
                    break;
            }
            $i++;
        }
        $entry++;
    }
    return $ret;
}
?>
于 2012-05-28T02:56:38.620 回答