更新
这是一个不扩展 DOMDocument 的版本,尽管我认为扩展是正确的方法,因为您正在尝试实现 DOM API 未内置的功能。
注意:我将“干净”和“没有变通办法”解释为保持对 DOM API 的所有操作。一旦您进行字符串操作,这就是解决方法领域。
就像在原始答案中一样,我正在做的是利用 DOMDocumentFragment 来操作都位于根级别的多个节点。没有进行字符串操作,对我来说这不是一种解决方法。
$doc = new DOMDocument();
$doc->loadHTML('<p><strong>Title...</strong></p><a href="http://www....."><img src="http://" alt=""></a><p>...to be one of those crowning achievements...</p>');
// Remove doctype node
$doc->doctype->parentNode->removeChild($doc->doctype);
// Remove html element, preserving child nodes
$html = $doc->getElementsByTagName("html")->item(0);
$fragment = $doc->createDocumentFragment();
while ($html->childNodes->length > 0) {
$fragment->appendChild($html->childNodes->item(0));
}
$html->parentNode->replaceChild($fragment, $html);
// Remove body element, preserving child nodes
$body = $doc->getElementsByTagName("body")->item(0);
$fragment = $doc->createDocumentFragment();
while ($body->childNodes->length > 0) {
$fragment->appendChild($body->childNodes->item(0));
}
$body->parentNode->replaceChild($fragment, $body);
// Output results
echo htmlentities($doc->saveHTML());
原始答案
这个解决方案相当冗长,但这是因为它通过扩展 DOM 来实现它,以使您的最终代码尽可能短。
sliceOutNode
是魔法发生的地方。如果您有任何问题,请告诉我:
<?php
class DOMDocumentExtended extends DOMDocument
{
public function __construct( $version = "1.0", $encoding = "UTF-8" )
{
parent::__construct( $version, $encoding );
$this->registerNodeClass( "DOMElement", "DOMElementExtended" );
}
// This method will need to be removed once PHP supports LIBXML_NOXMLDECL
public function saveXML( DOMNode $node = NULL, $options = 0 )
{
$xml = parent::saveXML( $node, $options );
if( $options & LIBXML_NOXMLDECL )
{
$xml = $this->stripXMLDeclaration( $xml );
}
return $xml;
}
public function stripXMLDeclaration( $xml )
{
return preg_replace( "|<\?xml(.+?)\?>[\n\r]?|i", "", $xml );
}
}
class DOMElementExtended extends DOMElement
{
public function sliceOutNode()
{
$nodeList = new DOMNodeListExtended( $this->childNodes );
$this->replaceNodeWithNode( $nodeList->toFragment( $this->ownerDocument ) );
}
public function replaceNodeWithNode( DOMNode $node )
{
return $this->parentNode->replaceChild( $node, $this );
}
}
class DOMNodeListExtended extends ArrayObject
{
public function __construct( $mixedNodeList )
{
parent::__construct( array() );
$this->setNodeList( $mixedNodeList );
}
private function setNodeList( $mixedNodeList )
{
if( $mixedNodeList instanceof DOMNodeList )
{
$this->exchangeArray( array() );
foreach( $mixedNodeList as $node )
{
$this->append( $node );
}
}
elseif( is_array( $mixedNodeList ) )
{
$this->exchangeArray( $mixedNodeList );
}
else
{
throw new DOMException( "DOMNodeListExtended only supports a DOMNodeList or array as its constructor parameter." );
}
}
public function toFragment( DOMDocument $contextDocument )
{
$fragment = $contextDocument->createDocumentFragment();
foreach( $this as $node )
{
$fragment->appendChild( $contextDocument->importNode( $node, true ) );
}
return $fragment;
}
// Built-in methods of the original DOMNodeList
public function item( $index )
{
return $this->offsetGet( $index );
}
public function __get( $name )
{
switch( $name )
{
case "length":
return $this->count();
break;
}
return false;
}
}
// Load HTML/XML using our fancy DOMDocumentExtended class
$doc = new DOMDocumentExtended();
$doc->loadHTML('<p><strong>Title...</strong></p><a href="http://www....."><img src="http://" alt=""></a><p>...to be one of those crowning achievements...</p>');
// Remove doctype node
$doc->doctype->parentNode->removeChild( $doc->doctype );
// Slice out html node
$html = $doc->getElementsByTagName("html")->item(0);
$html->sliceOutNode();
// Slice out body node
$body = $doc->getElementsByTagName("body")->item(0);
$body->sliceOutNode();
// Pick your poison: XML or HTML output
echo htmlentities( $doc->saveXML( NULL, LIBXML_NOXMLDECL ) );
echo htmlentities( $doc->saveHTML() );