How to use XPath on TXMLDocument which has namespace prefixes?

前端 未结 5 2031
猫巷女王i
猫巷女王i 2021-01-03 07:35

I have an XML packet received from a third-party web server:




        
相关标签:
5条回答
  • 2021-01-03 07:40

    When I tried this a couple of years ago, I found namespace lookup in XPath was different between xml providers.

    If I remember correctly, the Msxml lets you just use the namespace prefixes as they are defined in the xml file.

    The ADOM 4 provider requires that you resolve namespace prefixes used in your XPath query to the actual namespaces, independent of the namespace mapping used in the xml file. There is a method pointer for that purpose, OnOx4XPathLookupNamespaceURI. Then you can have a name lookup function like this:

    procedure TTestXmlUtil.EventLookupNamespaceURI(
      const AContextNode: IDomNode; const APrefix: WideString;
      var ANamespaceURI: WideString);
    begin
      if APrefix = 'soap' then
        ANamespaceURI := 'http://schemas.xmlsoap.org/soap/envelope/'
      else if APrefix = 'some' then
        ANamespaceURI := 'http://someurl'
    end;
    

    Using this lookup function, and the selectNode function (which looks like something I may have once posted in a Delphi forum, taken from https://github.com/Midiar/adomxmldom/blob/master/xmldocxpath.pas), I could do the following test (using your xml in a string constant):

    procedure TTestXmlUtil.SetUp;
    begin
      inherited;
      DefaultDOMVendor := sAdom4XmlVendor;
      docFull := LoadXmlData(csSoapXml);
    
      OnOx4XPathLookupNamespaceURI := EventLookupNamespaceURI;
    end;
    
    procedure TTestXmlUtil.Test_selectNode;
    var
      xn: IXmlNode;
    begin
      xn := selectNode(docFull.DocumentElement, '/soap:Envelope/soap:Body/some:SomeResponse/some:SomeResult');
      CheckNotNull(xn, 'selectNode returned nil');
    end;
    

    I had to modify you XPath query a little for the default namespace.

    0 讨论(0)
  • 2021-01-03 07:42

    Do not try to include namespaces in your XPath query. If all you want is the text of the SomeResult node, then you can use '//SomeResult' as query. For some reason the default xml implementation (msxml) barfs on the default namespace xmlns="http://someurl" on the SomeResponse parentnode. However, using OmniXML as the DOMVendor (= Crossplatform and valid from XE7 - thanks to @gabr) this works:

    program Project3;
    
    {$APPTYPE CONSOLE}
    
    {$R *.res}
    
    uses
      Xml.XmlIntf,
      Xml.XMLDoc,
      Xml.XMLDom,
      Xml.omnixmldom,
      System.SysUtils;
    
    const
     xml = '<?xml version="1.0" encoding="utf-8"?>'+#13#10+
            '<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'+#13#10+
            'xmlns:xsd="http://www.w3.org/2001/XMLSchema"'+#13#10+
            'xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">'+#13#10+
            ' <soap:Body>'+#13#10+
            '  <SomeResponse xmlns="http://tempuri.org">'+#13#10+
            '   <SomeResult>1</SomeResult>'+#13#10+
            '  </SomeResponse>'+#13#10+
            ' </soap:Body>'+#13#10+
            '</soap:Envelope>';
    
    function selectNode(xnRoot: IXmlNode; const nodePath: WideString): IXmlNode;
    var
      intfSelect : IDomNodeSelect;
      dnResult : IDomNode;
      intfDocAccess : IXmlDocumentAccess;
      doc: TXmlDocument;
    begin
      Result := nil;
      if not Assigned(xnRoot) or not Supports(xnRoot.DOMNode, IDomNodeSelect, intfSelect) then
        Exit;
      dnResult := intfSelect.selectNode(nodePath);
      if Assigned(dnResult) then
      begin
        if Supports(xnRoot.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then
          doc := intfDocAccess.DocumentObject
        else
          doc := nil;
        Result := TXmlNode.Create(dnResult, nil, doc);
      end;
    end;
    
    function XPathQuery(Doc : IXMLDocument; Query : String) : String;
    
    var
     Node : IXMLNode;
    
    begin
     Result := '';
     Node := SelectNode(Doc.DocumentElement, Query);
     if Assigned(Node) then
      Result := Node.Text
    end;
    
    var
     Doc : IXMLDocument;
    
    begin
     DefaultDOMVendor := sOmniXmlVendor;
     Doc := TXMLDocument.Create(nil);
     try
      Doc.LoadFromXML(Xml);
      Writeln(Doc.XML.Text);
      Writeln(XPathQuery(Doc, '//SomeResult'));
     except
      on E: Exception do
       Writeln(E.ClassName, ': ', E.Message);
     end;
     Doc := nil;
     Readln;
    end.
    
    0 讨论(0)
  • 2021-01-03 07:44

    One solution could be to remove all namespaces before you start processing your XML:

    class function TXMLHelper.RemoveNameSpaces(XMLString: String): String;
    const
      // An XSLT script for removing the namespaces from any document.
      // From http://wiki.tei-c.org/index.php/Remove-Namespaces.xsl
      cRemoveNSTransform =
        '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">' +
        '<xsl:output method="xml" encoding="utf-8"/>' +
    
        '<xsl:template match="/|comment()|processing-instruction()">' +
        '    <xsl:copy>' +
        '      <xsl:apply-templates/>' +
        '    </xsl:copy>' +
        '</xsl:template>' +
    
        '<xsl:template match="*">' +
        '    <xsl:element name="{local-name()}">' +
        '      <xsl:apply-templates select="@*|node()"/>' +
        '    </xsl:element>' +
        '</xsl:template>' +
    
        '<xsl:template match="@*">' +
        '    <xsl:attribute name="{local-name()}">' +
        '      <xsl:value-of select="."/>' +
        '    </xsl:attribute>' +
        '</xsl:template>' +
    
        '</xsl:stylesheet>';
    
    var
      Doc, XSL, Res: IXMLDocument;
      UTF8: UTF8String;
    begin
       try
         Doc := LoadXMLData(XMLString);
         XSL := LoadXMLData(cRemoveNSTransform);
         Res := NewXMLDocument;
         Doc.Node.TransformNode(XSL.Node,Res);  // Param types IXMLNode, IXMLDocument
         Res.SaveToXML(Utf8);      // This ensures that the encoding remains utf-8
         Result := String(UTF8);
       except
         on E:Exception do Result := E.Message;
       end;
    end; { RemoveNameSpaces }
    

    (TXMLHelper is a helper class that I have with some useful XML handling functions)

    0 讨论(0)
  • 2021-01-03 07:45

    As others have pointed out, different vendors handle namespaces differently. Here is an example using MSXML (the windows default) DOMVendor: (which I DO realise is not exactly what the OP was asking, but I felt it was worth documenting)

    XML:

    <?xml version="1.0" encoding="utf-8"?>
    <soap:Envelope 
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
      <soap:Body>
        <SomeResponse xmlns="http://someurl">
          <SomeResult>
            Some result here
          </SomeResult>
        </SomeResponse>
      </soap:Body>
    </soap:Envelope>
    

    Selection code (for completeness)

    // From a post in Embarcadero's Delphi XML forum.
    function selectNode(xnRoot: IXmlNode; const nodePath: WideString): IXmlNode;
    var
      intfSelect : IDomNodeSelect;
      dnResult : IDomNode;
      intfDocAccess : IXmlDocumentAccess;
      doc: TXmlDocument;
    begin
      Result := nil;
      if not Assigned(xnRoot) or not Supports(xnRoot.DOMNode, IDomNodeSelect, intfSelect) then
        Exit;
      dnResult := intfSelect.selectNode(nodePath);
      if Assigned(dnResult) then
      begin
        if Supports(xnRoot.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then
          doc := intfDocAccess.DocumentObject
        else
          doc := nil;
        Result := TXmlNode.Create(dnResult, nil, doc);
      end;
    end;
    

    Actual setting of XML search namespaces:

    uses Winapi.MSXMLIntf; // NOTE: Use this version of the interface. MSXML2_TLB won't work.
    ...
    procedure TForm1.DoExampleSearch;
    var fnd:IXmlNode;
        doc:IXmlDomDocument2;
        msdoc:TMSDOMDocument;
    const searchnames = 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '+
                        'xmlns:xsd="http://www.w3.org/2001/XMLSchema" '+
                        'xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" '+
                        'xmlns:some="http://someurl"';
    
    begin
      if Xmldocument1.DOMDocument is TMSDOMDocument then
      begin
        msdoc:=Xmldocument1.DOMDocument as TMSDOMDocument;
        doc:=(msdoc.MSDocument as IXMLDOMDocument2);
        doc.setProperty('SelectionLanguage', 'XPath');
        doc.setProperty('SelectionNamespaces',searchNames);
      end;
      fnd:=selectNode(XmlDocument1.DocumentElement,'/soap:Envelope/soap:Body/some:SomeResponse/some:SomeResult');
      if (fnd=nil) then showmessage('Not found') else showmessage('Found: '+fnd.Text);
    end;
    

    Couple of things worth noting: once you add namespaces into the mix at all, Xpath seems to insist on them for everything. Note that I added a 'some' namespace for the search criteria, because the SomResult inherited it from its parent, and I have yet to get XPath to implicitly handle default namespaces.

    0 讨论(0)
  • 2021-01-03 07:51

    The OmniXML solution:

    I can absolutely confirm the OmniXML XPath does NOT support namespaces per se.

    BUT:

    since it treats the nodenames as literals, 'soap:Envelope' will work in a query PROVIDED the name in the xml document IS soap:Envelope. So in the OP example, the OmniXML search path '/soap:Envelope/soap:Body/SomeResponse/SomeResult' would work.

    Note that you can absolutely NOT rely on inherited or default namespaces, OmniXML matches on the literal nodename.

    You could fairly easily implement a loop to either remove or normalize all namespace tags in your document without too much effort.

    0 讨论(0)
提交回复
热议问题