我想使用站点 url 获取站点标题,其中大部分站点都在工作,但是它在日语和中文站点上获得了一些不可读的文本。
这是我的功能
function file_get_contents_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
利用
use--------
$html = $this->file_get_contents_curl($url);
解析
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue;
我得到这个输出“ã¢ã¡ã¼ãIDç»é²ã¡ã¼ã«ã®ç¢ºèªï½Ameba(ã¢ã¡ã¼ã)”
网站网址:https ://user.ameba.jp/regist/registerIntro.do?campaignId=0053&frmid=3051
请帮我提出一些方法来获得任何语言的确切网站标题。
//例子
/* MEthod----------4 */
function file_get_contents_curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$uurl="http://www.piaohua.com/html/xuannian/index.html";
$html = file_get_contents_curl($uurl);
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
if(!empty($nodes->item(0)->nodeValue)){
$title = utf8_decode($nodes->item(0)->nodeValue);
}else{
$title =$uurl;
}
echo $title;