After answering a question about how to read an unknown JSON I tried to find something similar for XML (triggered by this related question).
The question is:
In the following I create a XML with several namespaces, one multi-text element and various nestings, repetitions, name-clashes and attributes. This should cover most real-world scenarios.
Hint: It's easy to wrap this as inline TVF and call it as a one liner, passing the XML as parameter.
DECLARE @xml XML=
N'
Some inner element
text1blah text2 text3
rep 1
rep 2
rep 4
Content in second level3 element
one more
';
--the query
WITH AllNamespaces As
(
SELECT CONCAT('ns',ROW_NUMBER() OVER(ORDER BY (B.namespaceUri))) Prefix
,B.namespaceUri
FROM @xml.nodes('//*') A(nd)
CROSS APPLY(VALUES(A.nd.value('namespace-uri(.)','nvarchar(max)')))B(namespaceUri)
WHERE LEN(B.namespaceUri)>0
GROUP BY B.namespaceUri
)
,recCte AS
(
SELECT 1 AS RecursionLevel
,1 AS NodeType
,ROW_NUMBER() OVER(ORDER BY A.nd) AS ElementPosition
,CAST(REPLACE(STR(ROW_NUMBER() OVER(ORDER BY A.nd),5),' ','0') AS VARCHAR(900)) COLLATE DATABASE_DEFAULT AS SortString
,ns.Prefix AS CurrentPrefix
,ns.namespaceUri AS CurrentUri
,CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)'),'[',ROW_NUMBER() OVER(PARTITION BY CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)')) ORDER BY A.nd),']') AS FullName
,CAST(CONCAT('/',ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)'),'[',ROW_NUMBER() OVER(PARTITION BY CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)')) ORDER BY A.nd),']') AS NVARCHAR(MAX)) COLLATE DATABASE_DEFAULT AS XPath
,A.nd.query('.') CurrentFragment
,A.nd.query('./*') NextFragment
FROM @xml.nodes('/*') A(nd)
LEFT JOIN AllNamespaces ns ON ns.namespaceUri=A.nd.value('namespace-uri(.)','nvarchar(max)')
UNION ALL
SELECT r.RecursionLevel+1
,1
,ROW_NUMBER() OVER(ORDER BY A.nd)
,CAST(CONCAT(r.SortString,REPLACE(STR(ROW_NUMBER() OVER(ORDER BY A.nd),5),' ','0')) AS VARCHAR(900)) COLLATE DATABASE_DEFAULT
,ns.Prefix
,ns.namespaceUri
,CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)'),'[',ROW_NUMBER() OVER(PARTITION BY CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)')) ORDER BY A.nd),']')
,CONCAT(r.XPath,'/',ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)'),'[',ROW_NUMBER() OVER(PARTITION BY CONCAT(ns.Prefix+':',A.nd.value('local-name(.)','nvarchar(max)')) ORDER BY A.nd),']')
,A.nd.query('.') CurrentFragment
,A.nd.query('./*') NextFragment
FROM recCte r
CROSS APPLY NextFragment.nodes('*') A(nd)
OUTER APPLY(SELECT Prefix,namespaceUri FROM AllNamespaces ns WHERE ns.namespaceUri=A.nd.value('namespace-uri(.)','nvarchar(max)')) ns
)
,WithValues AS
(
SELECT r.RecursionLevel
,CASE WHEN LEN(B.NodeValue)>0 THEN 3 ELSE r.NodeType END AS NodeType
,r.ElementPosition
,CASE WHEN LEN(B.NodeValue)>0 THEN CONCAT(r.SortString,REPLACE(STR(ROW_NUMBER() OVER(PARTITION BY r.Xpath ORDER BY A.txt),5),' ','0')) ELSE r.SortString END AS SortString
,r.CurrentPrefix
,r.CurrentUri
,CASE WHEN LEN(B.NodeValue)>0 THEN 'text()' ELSE r.FullName END AS FullName
,r.XPath AS OrigXPath
,CASE WHEN LEN(B.NodeValue)>0 THEN CONCAT(r.XPath,'/text()[',ROW_NUMBER() OVER(PARTITION BY r.Xpath ORDER BY A.txt),']') ELSE r.XPath END AS XPath
,CASE WHEN LEN(B.NodeValue)>0 THEN B.NodeValue ELSE NULL END AS NodeValue
,r.CurrentFragment
,r.NextFragment
FROM recCte r
OUTER APPLY r.CurrentFragment.nodes('*/text()') A(txt)
OUTER APPLY (SELECT A.txt.value('.','nvarchar(max)')) B(NodeValue)
)
,WithAttributes AS
(
SELECT RecursionLevel
,NodeType
,ElementPosition
,SortString
,CurrentPrefix
,CurrentUri
,FullName
,XPath
,NodeValue
,CurrentFragment
,NextFragment
FROM WithValues
UNION ALL
SELECT wv.RecursionLevel
,2
,wv.ElementPosition
,wv.SortString
,CASE WHEN ns.Prefix IS NOT NULL THEN ns.Prefix ELSE wv.CurrentPrefix END AS CurrentPrefix
,CASE WHEN ns.namespaceUri IS NOT NULL THEN ns.namespaceUri ELSE wv.CurrentUri END AS CurrentUri
,CONCAT('@',ns.Prefix+':',B.AttrName) AS FullName
,CONCAT(wv.OrigXPath,'/@',ns.Prefix+':',B.AttrName) AS XPath
,A.attr.value('.','nvarchar(max)') AS NodeValue
,wv.CurrentFragment
,wv.NextFragment
FROM WithValues wv
CROSS APPLY wv.CurrentFragment.nodes('*/@*') A(attr)
CROSS APPLY (SELECT A.attr.value('local-name(.)','nvarchar(max)') AS AttrName
,A.attr.value('.','nvarchar(max)') AS AttrValue
,A.attr.value('namespace-uri(.)','nvarchar(max)') AS namespaceUri) B
OUTER APPLY(SELECT Prefix,namespaceUri FROM AllNamespaces ns WHERE ns.namespaceUri=B.namespaceUri) ns
)
SELECT NodeType
,CurrentPrefix
,CurrentUri
,FullName
,XPath
,NodeValue
FROM WithAttributes
WHERE NodeValue IS NOT NULL
ORDER BY SortString;
--The result
/*
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| NodeType | CurrentPrefix | CurrentUri | FullName | XPath | NodeValue |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns2 | dummy1 | @test1 | /ns1:root[1]/ns2:level1[1]/@test1 | test1 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns2 | dummy1 | @ns2:test2 | /ns1:root[1]/ns2:level1[1]/@ns2:test2 | test2 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns3 | SomeOther | @ns3:test3 | /ns1:root[1]/ns2:level1[1]/@ns3:test3 | test3 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns3 | SomeOther | text() | /ns1:root[1]/ns2:level1[1]/ns3:InnerElement[1]/text()[1] | Some inner element |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/text()[1] | text1 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/ns1:someInner[1]/text()[1] | blah |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/text()[2] | text2 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/text()[3] | text3 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:repeating[1]/text()[1] | rep 1 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:repeating[2]/text()[1] | rep 2 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns1 | defaultNs | @r2 | /ns1:root[1]/ns2:level1[1]/ns1:repeating[2]/@r2 | r2 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns3 | SomeOther | @r4 | /ns1:root[1]/ns2:level1[1]/ns3:repeating[1]/@r4 | r4 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns3 | SomeOther | text() | /ns1:root[1]/ns2:level1[1]/ns3:repeating[1]/text()[1] | rep 4 |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:level2[1]/ns1:level3[2]/ns1:content[1]/text()[1] | Content in second level3 element |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 2 | ns1 | defaultNs | @oneMore | /ns1:root[1]/ns2:level1[1]/ns1:repeating[3]/@oneMore | oneMore |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
| 3 | ns1 | defaultNs | text() | /ns1:root[1]/ns2:level1[1]/ns1:repeating[3]/text()[1] | one more |
+----------+---------------+------------+------------+---------------------------------------------------------------------------------+----------------------------------+
*/
--Just to show, that the created XPaths return the expected (attention: We must use our own prefixes - even for the default namespace):
WITH XMLNAMESPACES( 'defaultNs' AS ns1
,'dummy1' AS ns2
,'SomeOther' AS ns3)
SELECT @xml.value('/ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/ns1:someInner[1]/text()[1]','nvarchar(max)') Is_blah
,@xml.value('/ns1:root[1]/ns2:level1[1]/ns1:level2[1]/ns1:level3[2]/ns1:content[1]/text()[1]','nvarchar(max)') Is_Content_in_second_level3_element
,@xml.value('/ns1:root[1]/ns2:level1[1]/ns1:repeating[3]/@oneMore','nvarchar(max)') Is_attribute_oneMore
,@xml.value('/ns1:root[1]/ns2:level1[1]/ns1:multiText[1]/text()[3]','nvarchar(max)') Is_3rd_text_in_multiText;
The idea in short:
APPLY
with .nodes()
can return nested nodes.text()
nodes - if there are any.NodeType
helps to distinguish between elements (=1), attributes (=2) and text()
(=3)SELECT * ...
to see all returned colums...WHERE NodeValue IS NOT NULL
to see more of the empty structure.