不是真正的正则表达式,但它可以帮助你(未经测试):
$xmlPrologue = '<?xml version="1.0"?>';
$source = '...'; // you're business
$dom = new DOMDocument($source);
$dom->loadXML($source);
$links = $dom->getElementsByTagName('a');
foreach ($links as $link) {
list($base, $queryString) = explode('?', $link->getAttribute('href'));
// read GET parameters inside an array
parse_str(, $queryString/* assigned by reference */);
// get rid of unwanted GET params
unset($queryString['utm_source']);
unset($queryString['utm_medium']);
unset($queryString['utm_email']);
unset($queryString['utm_report']);
// recompose query string
$queryString = http_build_query($queryString, null, '&');
// or (not sure which we'll work the best)
$queryString = http_build_query($queryString, null, '&');
// assign the newly cleaned href attribute
$link->setAttribute('href', $base . '?' . $queryString);
}
$html = $dom->saveXML();
// strip the XML declaration. Puts IE in quirks mode
$html = substr_replace($html, '', 0, strlen($xmlPrologue));
$html = trim($html);
echo $html;