下面的脚本从 Google 获取图片,它只获取 $page 变量中指定的页面的 20 张图片。
我不知道为什么它会得到正好 20 个结果,以及如何将这个值更改为更大,例如显示 100 个第一张图像
// Image sizes
define ('GIS_LARGE', 'l');
define ('GIS_MEDIUM', 'm');
define ('GIS_ICON', 'i');
define ('GIS_ANY', '');
// Image types
define ('GIS_FACE', 'face');
define ('GIS_PHOTO', 'photo');
define ('GIS_CLIPART', 'clipart');
define ('GIS_LINEART', 'lineart');
function get_data($url)
$ch = curl_init();
$timeout = 5;
$data = curl_exec($ch);
return $data;
function googleImageSearch ($query, $page = 1, $size = GIS_ANY, $type = GIS_ANY)
$retVal = array();
// Get the search results page
$response = get_data("http://images.google.com/images?hl=en&q=" . urlencode ($query) . '&imgsz=' . $size . '&imgtype=' . $type . '&start=' . (($page - 1) * 21));
// Extract the image information. This is found inside of a javascript call to setResults
preg_match('/\<table class=\"images_table\"(.*?)\>(.*?)\<\/table\>/is', $response, $match);
if (isset($match[2])) {
// Grab all the arrays
preg_match_all('/\<td(.*?)\>(.*?)\<\/td\>/', $match[2], $m);
foreach ($m[2] as $item) {
// List of expressions used to grab all our info
$info = array(
'resultLink' => '\<a href=\"(.*?)\"',
'source' => 'imgurl=(.*?)&',
'title' => '\<br\/\>(.*?)\<br\/\>([\d]+)',
'width' => '([\d]+) ×',
'height' => '× ([\d]+)',
'type' => ' -([\w]+)',
'size' => ' - ([\d]+)',
'thumbsrc' => 'src="(.*?)"',
'thumbwidth' => 'width="([\d]+)"',
'thumbheight' => 'height="([\d]+)"',
'domain' => '\<cite title="(.*?)"\>'
$t = new stdClass;
$t->thumb = new stdClass;
foreach ($info as $prop => $expr) {
if (preg_match('/' . $expr . '/is', $item, $m)) {
$value = 'title' == $prop ? str_replace(array('<b>', '</b>'), '', $m[1]) : $m[1];
// Thumb properties go under the thumb object
if (0 === strpos($prop, 'thumb')) {
$prop = str_replace('thumb', '', $prop);
$t->thumb->$prop = $value;
} else {
$t->$prop = $value;
// Nicey up the google images result url
if ('resultLink' == 'resultLink') {
$t->resultLink = 'http://images.google.com' . $t->resultLink;
$retVal[] = $t;
return $retVal;
告诉脚本获取 20 张图像的代码行在哪里?