查看下面的代码,在 PHP 5.3 上测试,工作正常。添加了代码注释。
<?php
// test input data
$data = 'br1<br>,br3<br >, br4<br/>,br2<br />, <i>i1</i><b>b1</b> foo<b onmouseover="alert(hi);">b1</b><u tricky="hello">u1</u> <big>big1</big><small>small1</small> <sub>sub1</sub><div name="aaaa">div1</div> <sup>sup1</sup><font>font1</font> <font >font2</font><font onmouseover="alert(\'hi\');" color="red" style="background-color:green;">font3</font><font onmouseover="alert(\'hi\');" color="red" style="background-color:green;" >font4</font><ul><li>li1</li></ul><ol><li>li2</li></ol>';
// set the allowed tags and their allowed attribs
// case-insensitive
$allowed = array(
"br" => "", // second value is allowed attrs, "" means all attrs allowed for this tag
"i" => "",
"b" => "",
"u" => "",
"big" => "",
"small" => "",
"sub" => "",
"sup" => "",
"font" => "color,style", // comma separated list of allowed attrs, other attrs will be stripped out
"ul" => "",
"ol" => "",
"li" => "",
);
// this will contain output results
$outdata='';
// this func will do the job
fixit();
// print out the results
echo $outdata;
function start_element_handler($parser, $name, $attrs) {
global $outdata,$allowed;
// tag allowed, check further
if( in_array($name,array_keys($allowed)) ) {
$attrout="";
if(!empty($attrs)) {
foreach($attrs as $attr=>$val) {
// attr allowed, write normal output, else skip the attr/val
if(empty($allowed[$name]) || in_array($attr,$allowed[$name]))
$attrout=$attrout." ".strtolower($attr)."=\"$val\" ";
}
}
$outdata=$outdata."<".strtolower($name)."$attrout>";
}
// tag not allowed, htmlentityencode the output
else {
$attrout="";
if(!empty($attrs)) {
foreach($attrs as $attr=>$val) {
$attrout=$attrout." ".strtolower($attr)."=\"$val\" ";
}
}
$outdata=$outdata.htmlentities("<".strtolower($name)."$attrout>",ENT_COMPAT,'UTF-8');
}
}
function end_element_handler($parser, $name) {
global $outdata,$allowed;
// void elements have no ending tags, so skip writing to output
$voids = array("AREA", "BASE", "BR", "COL", "COMMAND", "EMBED", "HR", "IMG", "INPUT", "KEYGEN", "LINK", "META", "PARAM", "SOURCE", "TRACK", "WBR");
if(in_array($name,$voids))
return;
$nameout = "</".strtolower($name).">";
// tag allowed, write normal output
if( in_array($name,array_keys($allowed)) ) {
$outdata=$outdata.$nameout;
}
// tag not allowed, htmlentityencode the output
else {
$outdata=$outdata.htmlentities($nameout,ENT_COMPAT,'UTF-8');
}
}
function default_handler($parser, $data) {
global $outdata,$allowed; $outdata=$outdata.htmlentities($data,ENT_COMPAT,'UTF-8');
}
function fixit() {
global $data, $allowed, $outdata;
$outdata="";
// added temp body tags to make xml parser work fine
$data="<body>{$data}</body>";
// change allowed tags and attrs to uppercase for comparisons later
$allowed=array_change_key_case($allowed,CASE_UPPER);
foreach($allowed as $tag=>$attrs) $allowed[$tag] = array_filter(explode(",",strtoupper($attrs)));
// create the parser
$parser = xml_parser_create('UTF-8');
// set to uppercase comparisons
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, true);
xml_set_element_handler($parser, 'start_element_handler', 'end_element_handler');
xml_set_default_handler($parser, 'default_handler');
// parse the data
xml_parse($parser, $data, true);
xml_parser_free($parser);
// set output in $outdata variable
$outdata = str_ireplace(array('<body>','</body>'), "", $outdata);
}
?>