1

当您使用特定关键字进行谷歌搜索时,我想提取链接。我知道 API 已经存在,但我想创建自己的脚本。目前,我可以提取链接,但我的正则表达式不是很好。

#  /url?q=http://wikipedia.org/wiki/World_Wide_Web&sa=U&ei=naEHUbDoLITQ0QXWwIGoCw&ved=0CB8QFjAA&usg=AFQjCNEn84GVN2VHpfEVjnMm5FG_p8YjgQ

我不知道我们如何才能只保留真实的网址。所以,如果你有一个想法。谢谢

$key = "test"; 
if ($kw = $key){
    $pagenum = 0;               
    $googlefrurl = "http://www.google.com/search?q=" . urlencode($kw) . "&start=$pagenum";

    while($pagenum <= 0){
        $result= file_get_contents($googlefrurl); 
        preg_match_all('<a href="(.*?)">si', $result, $matches);
        $i = 0;         
        $n = count($matches[1]);        
        $pagenum++;         
        $pagenum2 = $pagenum.'0'; 
        $googlefrurl = "http://www.google.com/search?q=" . urlencode($kw) .
            "&start=$pagenum2&safe=off&pwst=1&filter=0";

        while($i <= $n){
            $u++;
        } 
    }
}
4

1 回答 1

0

您需要 Google API 密钥进行搜索,然后使用自定义搜索 REST API https://developers.google.com/custom-search/v1/using_rest

它返回带有结果和您需要的一切的 JSON 例如,请求(从文档中快速复制):

GET https://www.googleapis.com/customsearch/v1?
key=INSERT-YOUR-KEY&cx=013036536707430787589:_pqjad5hr1a&q=flowers&alt=json

退货

200 OK

{
 "kind": "customsearch#search",
 "url": {
  "type": "application/json",
  "template": "https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&cref={cref?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&nsc={nsc?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&relatedSite={relatedSite?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json"
 },
 "queries": {
  "nextPage": [
   {
    "title": "Google Custom Search - flowers",
    "totalResults": 10300000,
    "searchTerms": "flowers",
    "count": 10,
    "startIndex": 11,
    "inputEncoding": "utf8",
    "outputEncoding": "utf8",
    "cx": "013036536707430787589:_pqjad5hr1a"
   }
  ],
  "request": [
   {
    "title": "Google Custom Search - flowers",
    "totalResults": 10300000,
    "searchTerms": "flowers",
    "count": 10,
    "startIndex": 1,
    "inputEncoding": "utf8",
    "outputEncoding": "utf8",
    "cx": "013036536707430787589:_pqjad5hr1a"
   }
  ]
 },
 "context": {
  "title": "Custom Search"
 },
 "items": [
 {
   "kind": "customsearch#result",
   "title": "Flower - Wikipedia, the free encyclopedia",
   "htmlTitle": "\u003cb\u003eFlower\u003c/b\u003e - Wikipedia, the free
   encyclopedia",
   "link": "http://en.wikipedia.org/wiki/Flower",
   "displayLink": "en.wikipedia.org",
   "snippet": "A flower, sometimes known as a bloom or blossom, is the reproductive structure found in flowering plants (plants of the division Magnoliophyta, ...",
   "htmlSnippet": "A \u003cb\u003eflower\u003c/b\u003e, sometimes known as a bloom or blossom, is the reproductive structure \u003cbr\u003e  found in flowering plants (plants of the division Magnoliophyta, \u003cb\u003e... \u003c/b\u003e",
   "pagemap": {
    "RTO": [
     {
      "format": "image",
      "group_impression_tag": "prbx_kr_rto_term_enc",
      "Opt::max_rank_top": "0",
      "Opt::threshold_override": "3",
      "Opt::disallow_same_domain": "1",
      "Output::title": "\u003cb\u003eFlower\u003c/b\u003e",
      "Output::want_title_on_right": "true",
      "Output::num_lines1": "3",
      "Output::text1": "꽃은 식물 에서 씨 를 만들어 번식 기능을 수행하는 생식 기관 을 말한다. 꽃을 형태학적으로 관찰하여 최초로 총괄한 사람은 식물계를 24강으로 분류한 린네 였다. 그 후 꽃은 식물분류학상중요한 기준이 되었다.",
      "Output::gray1b": "- 위키백과",
      "Output::no_clip1b": "true",
      "UrlOutput::url2": "http://en.wikipedia.org/wiki/Flower",
      "Output::link2": "위키백과 (영문)",
      "Output::text2b": "   ",
      "UrlOutput::url2c": "http://ko.wikipedia.org/wiki/꽃",
      "Output::link2c": "위키백과",
      "result_group_header": "백과사전",
      "Output::image_url": "http://www.gstatic.com/richsnippets/b/fcb6ee50e488743f.jpg",
      "image_size": "80x80",
      "Output::inline_image_width": "80",
      "Output::inline_image_height": "80",
      "Output::image_border": "1"
     }
    ]
   }
  },
  ...
 ]
}
于 2013-01-29T13:25:44.050 回答