0

我有一个函数,我正在使用array_walk函数使用Title函数扫描Array [link]中的值并将其放入相应数组的[title]中

例如 Qlick => [链接] => "http://www.theqlick.com" [title] => Qlick

    $links = Array();

$URL = 'http://www.theqlick.com'; // change it for urls to grab  

// grabs the urls from URL 
$file  = file_get_html($URL);
foreach ($file->find('a') as $theelement) 
{
    $abs_url = url_to_absolute($URL, $theelement->href);
    if (!empty($abs_url))
        $links[] = $abs_url;
}

  function Titles() {
  global $links;
  $str = implode('',array_map('file_get_contents',$links));
  error_reporting(E_ERROR | E_PARSE);

  $titles = Array();
    if( strlen( $str )>0 ) {
  $titles[] = preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
  return $title;   
  return $links;
  } }


  $newArray = array();


  $title = array_walk($links, 'Titles');
  foreach($links as $key => $val ){
$newArray[$key] = array( 'link' => $val, 'title' => $title);
 }
 print_r($newArray);

My result when var_dump is used:
  array(2) {



 [0]=>
  array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }



  [1]=>
  array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }

    }
   array(2) {
   [0]=>
     array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
       }


  [1]=>
 array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
  [1]=>
  array(6) {
   [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }

    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }
  [1]=>
   array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }
    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
   [1]=>
   array(6) {
  [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }
  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
    }

  [1]=>
  array(6) {
   [0]=>
   string(11) " The Qlick "
   [1]=>

 string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


   Array
   ( 
[0] => Array
    (
        [link] => http://www.theqlick.com/index.php
        [title] => 1
    )

[1] => Array
    (
        [link] => http://www.theqlick.com/qlickdates.php
        [title] => 1
    )

[2] => Array
    (
        [link] => http://www.theqlick.com/festivalfreaks.html
        [title] => 1
    )

[3] => Array
    (
        [link] => http://www.theqlick.com/2kcm.php
        [title] => 1
    )

[4] => Array
    (
        [link] => http://www.theqlick.com/index3.php
        [title] => 1
    )

[5] => Array
    (
        [link] => http://www.theqlick.com/index2.php
        [title] => 1
    )

       )
4

1 回答 1

0

将函数的结尾更改为:

if( strlen( $str )>0 ) {
    $titles = Array();
    preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
    if (count($titles) > 1) {
        return $titles[1];   
    }
}

return '';

preg_match_all返回匹配的数量。这将返回您找到的第一个匹配项,如果没有要搜索的标题或文本,则返回一个空字符串。

我没有测试过这个,所以它可能需要调试。

编辑添加:

$links = Array();  
$URL = 'http://www.theqlick.com'; // change it for urls to grab    
// grabs the urls from URL  

function Titles($link) {
    $str = file_get_contents($link);    
    if( strlen( $str )>0 ) {    
        preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
        if (count($titles) > 1) {
            return $titles[1];   
        }
    }

    return '';
}

$file  = file_get_html($URL); 
foreach ($file->find('a') as $theelement)  {     
    $abs_url = url_to_absolute($URL, $theelement->href);     
    if (!empty($abs_url)) {
         $links[] = $abs_url; 
    } 
}

$output = Array();

foreach ($links as $thisLink) {
    $output[] = array("link" => $thisLink, "title" => Titles($thisLink));
}    

(再次,这是非常未经测试的)

您生成链接列表;然后,逐步浏览该列表,并为每个列表获取页面标题。你一次只做一个,所以更容易跟踪什么在哪里。

于 2012-09-21T21:09:22.617 回答