我过去使用过这个线程,但在测试过程中,我发现一些大型文档类型存在问题。有时,开发人员将 doctype 分成 2 或 3 行。在这种情况下,使用正则表达式并不是最好的方法。
我在一行或几行中粘贴了一种用于文档类型的方法:
<?
class Doctype {
var $html;
var $doctype;
var $version;
function Doctype($html){
$this->html = $html;
$this->extractDoctype();
$this->processDoctype();
}
private function extractDoctype(){
$preDoctype = "";
$preDoctypeValid = false;
$lines = explode(PHP_EOL, $this->html);
foreach ($lines as &$line) {
$preDoctype = $preDoctype . $line;
if(
(strpos(strtolower($preDoctype), "<!doctype") !== false) &&
(strpos(strtolower($preDoctype), ">") !== false)){
$preDoctypeValid = true;
break;
}
}
if($preDoctypeValid){
//Store only the pattern: <! doctype >
$pos1 = strpos(strtolower($preDoctype), "<!doctype");
$pos2 = strpos($preDoctype, ">", $pos1) + 1;
$preDoctype = substr($preDoctype, $pos1, $pos2);
}else{
$preDoctype = "";
}
$this->doctype = $preDoctype;
}
private function processDoctype(){
$version = "";
$pattern_html5 = "/<!doctype\s+?html\s?>/i";
if (preg_match($pattern_html5, strtolower($this->doctype))) {
$version = "HTML5";
}else if(strpos(strtolower($this->doctype), "xhtml") !== false){
$version = "XHTML";
}else if(strpos(strtolower($this->doctype), "html") !== false){
if(strpos(strtolower($this->doctype), "3.2") !== false){
$version = "HTML 3.2";
}
if(strpos(strtolower($this->doctype), "4.01") !== false){
$version = "HTML 4.01";
}
if(strpos(strtolower($this->doctype), "2.0") !== false){
$version = "HTML 2.0";
}
}else{
$version = "OTHER";
}
$this->version = $version;
}
public function getDoctype(){
return $this->doctype;
}
public function getDoctypeVersion(){
return $this->version;
}
}
?>
https://github.com/jabrena/WTAnalyzer/blob/master/r_php/document/Doctype.class.php