Remove parent element, keep all inner children in DOMDocument with saveHTML

前端 未结 5 2155
情歌与酒
情歌与酒 2021-01-04 20:42

I\'m manipulating a short HTML snippet with XPath; when I output the changed snippet back with $doc->saveHTML(), DOCTYPE gets added, and HTML / BODY

5条回答
  •  轮回少年
    2021-01-04 21:10

    UPDATE

    Here's a version that doesn't extend DOMDocument, though I think extending is the proper approach, since you're trying to achieve functionality that isn't built-in to the DOM API.

    Note: I'm interpreting "clean" and "without workarounds" as keeping all manipulation to the DOM API. As soon as you hit string manipulation, that's workaround territory.

    What I'm doing, just as in the original answer, is leveraging DOMDocumentFragment to manipulate multiple nodes all sitting at the root level. There is no string manipulation going on, which to me qualifies as not being a workaround.

    $doc = new DOMDocument();
    $doc->loadHTML('

    Title...

    ...to be one of those crowning achievements...

    '); // Remove doctype node $doc->doctype->parentNode->removeChild($doc->doctype); // Remove html element, preserving child nodes $html = $doc->getElementsByTagName("html")->item(0); $fragment = $doc->createDocumentFragment(); while ($html->childNodes->length > 0) { $fragment->appendChild($html->childNodes->item(0)); } $html->parentNode->replaceChild($fragment, $html); // Remove body element, preserving child nodes $body = $doc->getElementsByTagName("body")->item(0); $fragment = $doc->createDocumentFragment(); while ($body->childNodes->length > 0) { $fragment->appendChild($body->childNodes->item(0)); } $body->parentNode->replaceChild($fragment, $body); // Output results echo htmlentities($doc->saveHTML());

    ORIGINAL ANSWER

    This solution is rather lengthy, but it's because it goes about it by extending the DOM in order to keep your end code as short as possible.

    sliceOutNode is where the magic happens. Let me know if you have any questions:

    registerNodeClass( "DOMElement", "DOMElementExtended" );
        }
    
        // This method will need to be removed once PHP supports LIBXML_NOXMLDECL
        public function saveXML( DOMNode $node = NULL, $options = 0 )
        {
            $xml = parent::saveXML( $node, $options );
    
            if( $options & LIBXML_NOXMLDECL )
            {
                $xml = $this->stripXMLDeclaration( $xml );
            }
    
            return $xml;
        }
    
        public function stripXMLDeclaration( $xml )
        {
            return preg_replace( "|<\?xml(.+?)\?>[\n\r]?|i", "", $xml );
        }
    }
    
    class DOMElementExtended extends DOMElement
    {
        public function sliceOutNode()
        {
            $nodeList = new DOMNodeListExtended( $this->childNodes );
            $this->replaceNodeWithNode( $nodeList->toFragment( $this->ownerDocument ) );
        }
    
        public function replaceNodeWithNode( DOMNode $node )
        {
            return $this->parentNode->replaceChild( $node, $this );
        }
    }
    
    class DOMNodeListExtended extends ArrayObject
    {
        public function __construct( $mixedNodeList )
        {
            parent::__construct( array() );
    
            $this->setNodeList( $mixedNodeList );
        }
    
        private function setNodeList( $mixedNodeList )
        {
            if( $mixedNodeList instanceof DOMNodeList )
            {
                $this->exchangeArray( array() );
    
                foreach( $mixedNodeList as $node )
                {
                    $this->append( $node );
                }
            }
            elseif( is_array( $mixedNodeList ) )
            {
                $this->exchangeArray( $mixedNodeList );
            }
            else
            {
                throw new DOMException( "DOMNodeListExtended only supports a DOMNodeList or array as its constructor parameter." );
            }
        }
    
        public function toFragment( DOMDocument $contextDocument )
        {
            $fragment = $contextDocument->createDocumentFragment();
    
            foreach( $this as $node )
            {
                $fragment->appendChild( $contextDocument->importNode( $node, true ) );
            }
    
            return $fragment;
        }
    
        // Built-in methods of the original DOMNodeList
    
        public function item( $index )
        {
            return $this->offsetGet( $index );
        }
    
        public function __get( $name )
        {
            switch( $name )
            {
                case "length":
                    return $this->count();
                break;
            }
    
            return false;
        }
    }
    
    // Load HTML/XML using our fancy DOMDocumentExtended class
    $doc = new DOMDocumentExtended();
    $doc->loadHTML('

    Title...

    ...to be one of those crowning achievements...

    '); // Remove doctype node $doc->doctype->parentNode->removeChild( $doc->doctype ); // Slice out html node $html = $doc->getElementsByTagName("html")->item(0); $html->sliceOutNode(); // Slice out body node $body = $doc->getElementsByTagName("body")->item(0); $body->sliceOutNode(); // Pick your poison: XML or HTML output echo htmlentities( $doc->saveXML( NULL, LIBXML_NOXMLDECL ) ); echo htmlentities( $doc->saveHTML() );

提交回复
热议问题