1

我想从这个网站检索一些数据: http ://sites.target.com/site/en/spot/search_results.jsp?&mapType=enhanced&startAddress=72756&startingLat=36.322757720947266&startingLong=-93.99922943115234&_requestid=2573952

上面网页的html源代码中有以下几行,

<script>
    function GetMap(){
    var results = new Array();
    results[results.length] = {"lat" : "36.299484", "lon" : "-94.173495", "id" : "2498", "name" : "test", "phoneNumber" : "(479) 986-1100", "hours" : "test", "address" : {"city" : "Rogers", "state" : "AR", "zip" : "72758", "street" : "2404 Promenade Blvd" }, "concepts" : [{"name" : "Pharmacy", "phoneNumber" : "(479) 986-1101", "hours" : "<b>M-Fr:</b> 10:00AM-7:00PM<br><b>Sa:</b> 10:00AM-7:00PM" }, {"name" : "PhotoLab", "hours" : "<b>M-Fr:</b> 9:00AM-9:00PM<br><b>Sa:</b> 9:00AM-9:00PM<br><b>Su:</b> 10:00AM-8:00PM" }, {"name" : "Wine"}, {"name" : "Starbucks"}]};
    results[results.length] = {"lat" : "36.1157", "lon" : "-94.1555", "id" : "1470", "name" : "test", "phoneNumber" : "(479) 443-5517", "hours" : "test", "address" : {"city" : "Fayetteville", "state" : "AR", "zip" : "72703", "street" : "3545 N Shiloh Dr" }, "concepts" : [{"name" : "Pharmacy", "phoneNumber" : "(479) 443-5628", "hours" : "<b>M-Fr:</b> 9:00AM-7:00PM<br><b>Sa:</b> 9:00AM-5:00PM<br><b>Su:</b> 11:00AM-5:00PM" }]};
    results[results.length] = {"lat" : "36.6738", "lon" : "-93.2257", "id" : "2098", "name" : "test", "phoneNumber" : "(417) 243-4500", "hours" : "test", "address" : {"city" : "Branson", "state" : "MO", "zip" : "65616", "street" : "1200 Branson Hills Pkwy" }, "concepts" : [{"name" : "Pharmacy", "phoneNumber" : "(417) 243-4513", "hours" : "<b>M-Fr:</b> 9:00AM-9:00PM<br><b>Sa:</b> 9:00AM-6:00PM<br><b>Su:</b> 9:00AM-6:00PM" }, {"name" : "PhotoLab", "phoneNumber" : "(417) 243-4500", "hours" : "<b>M-Fr:</b> 12:00PM-6:00PM<br><b>Sa:</b> 12:00PM-6:00PM<br><b>Su:</b> 12:00PM-6:00PM" }, {"name" : "Wine"}, {"name" : "Starbucks"}]};
    results[results.length] = {"lat" : "37.0849", "lon" : "-94.474", "id" : "774", "name" : "test", "phoneNumber" : "(417) 659-8755", "hours" : "test", "address" : {"city" : "Joplin", "state" : "MO", "zip" : "64801", "street" : "3151 E 7th St" }, "concepts" : [{"name" : "FreshGrocery"}, {"name" : "Pharmacy", "phoneNumber" : "(417) 206-3377", "hours" : "<b>M-Fr:</b> 9:00AM-9:00PM<br><b>Sa:</b> 9:00AM-6:00PM<br><b>Su:</b> 9:00AM-6:00PM" }, {"name" : "Wine"}, {"name" : "Starbucks"}]};
    results[results.length] = {"lat" : "37.1511", "lon" : "-93.2623", "id" : "1031", "name" : "test", "phoneNumber" : "(417) 889-1511", "hours" : "test", "address" : {"city" : "Springfield", "state" : "MO", "zip" : "65804", "street" : "1825 E Primrose St" }, "concepts" : [{"name" : "FreshGrocery"}, {"name" : "Pharmacy", "phoneNumber" : "(417) 520-1745", "hours" : "<b>M-Fr:</b> 9:00AM-7:00PM<br><b>Sa:</b> 9:00AM-5:00PM<br><b>Su:</b> 11:00AM-5:00PM" }, {"name" : "PhotoLab", "phoneNumber" : "(417) 889-1511", "hours" : "<b>M-Fr:</b> 12:00PM-6:00PM<br><b>Sa:</b> 12:00PM-6:00PM<br><b>Su:</b> 12:00PM-6:00PM" }, {"name" : "Starbucks"}]};

我想分开纬度、经度、id、姓名、电话号码、城市、州、邮政编码。

是否可以从上面的 javaScript 代码中解析数据,我对制作这种复杂行的正则表达式感到困惑。

我们可以以以下格式获取 PHP 中的数据吗?

Array
(
    [lat] => 36.299484
    [lon] => -94.173695
    [id] =>  2498
    [name] => Rogers
    [phoneNumber] => (479) 986-1100
    [city] => Rogers
    [state] => AR
    [zipcode] => 72758

)

这是我不完整的方法..

   $fp = fopen("file.csv","w");
    $contents = file_get_contents('http://sites.target.com/site/en/spot/search_results.jsp?&mapType=enhanced&startAddress=72756&startingLat=36.322757720947266&startingLong=-93.99922943115234&_requestid=2573952');
    preg_match_all('Regular Expression Here', $contents, $matches);        
    foreach ($matches[1] as $index) {       
        preg_match('Regular Expression Here', $contents, $matches);
        preg_match_all('Regular Expression Here', $matches [1], $matches);
        $c = count ($matches [1]);
        $results = array();
        for ($i=0; $i<$c; $i++)  {
            $results [$matches [1] [$i]] = trim($matches [2] [$i], "\'");
        }
        fwrite($fp,implode(";",array_values($results))."\r\n");
    }
    fclose($fp);
4

2 回答 2

1

信息采用称为 JSON 的格式。PHP 有一个 JSON 解析器。

http://php.net/manual/en/book.json.php

于 2012-06-06T22:16:24.193 回答
1

如果格式像您的示例中那样固定,我不会打扰正则表达式。

我会做的是:

  1. 使用file而不是file_get_contents获取数组中的内容(每个数组元素一行文件);
  2. 循环遍历数组;
  3. 使用许多字符串函数中的一些来检查results[results.length] =和删除之前的所有内容,包括results[results.length] =它自己;
  4. 用于trim去除结尾和开头和结尾的空格;
  5. 用于json_decode解码剩余的 json 对象,请参见此示例
于 2012-06-06T22:32:57.963 回答