Hopefully I\'m missing a simple solution to this.
I have two tables. One contains a list of companies. The second contains a list of publishers. The mapping between
This is a recursive solution, using XML:
with a as ( -- recursive result, containing shorter subsets and duplicates
select cast('' + company + ' ' as xml) as companies
,cast('' + publisher + '
' as xml) as publishers
from Table1
union all
select a.companies.query('for $c in distinct-values((for $i in /c return string($i),
sql:column("t.company")))
order by $c
return {$c} ')
,a.publishers.query('for $p in distinct-values((for $i in /p return string($i),
sql:column("t.publisher")))
order by $p
return {$p}
')
from a join Table1 t
on ( a.companies.exist('/c[text() = sql:column("t.company")]') = 0
or a.publishers.exist('/p[text() = sql:column("t.publisher")]') = 0)
and ( a.companies.exist('/c[text() = sql:column("t.company")]') = 1
or a.publishers.exist('/p[text() = sql:column("t.publisher")]') = 1)
), b as ( -- remove the shorter versions from earlier steps of the recursion and the duplicates
select distinct -- distinct cannot work on xml types, hence cast to nvarchar
cast(companies as nvarchar) as companies
,cast(publishers as nvarchar) as publishers
,DENSE_RANK() over(order by cast(companies as nvarchar), cast(publishers as nvarchar)) as groupid
from a
where not exists (select 1 from a as s -- s is a proper subset of a
where (cast('' + cast(s.companies as varchar)
+ '' + cast(a.companies as varchar) + '' as xml)
).value('if((count(/s/c) > count(/a/c))
and (some $s in /s/c/text() satisfies
(some $a in /a/c/text() satisfies $s = $a))
) then 1 else 0', 'int') = 1
)
and not exists (select 1 from a as s -- s is a proper subset of a
where (cast('' + cast(s.publishers as nvarchar)
+ '' + cast(a.publishers as nvarchar) + '' as xml)
).value('if((count(/s/p) > count(/a/p))
and (some $s in /s/p/text() satisfies
(some $a in /a/p/text() satisfies $s = $a))
) then 1 else 0', 'int') = 1
)
), c as ( -- cast back to xml
select cast(companies as xml) as companies
,cast(publishers as xml) as publishers
,groupid
from b
)
select Co.company.value('(./text())[1]', 'varchar') as company
,Pu.publisher.value('(./text())[1]', 'varchar') as publisher
,c.groupid
from c
cross apply companies.nodes('/c') as Co(company)
cross apply publishers.nodes('/p') as Pu(publisher)
where exists(select 1 from Table1 t -- restrict to only the combinations that exist in the source
where t.company = Co.company.value('(./text())[1]', 'varchar')
and t.publisher = Pu.publisher.value('(./text())[1]', 'varchar')
)
The set of companies and the set of publishers are kept in XML fields in the intermediate steps, and there is some casting between xml and nvarchar necessary due to some limitations of SQL Server (like not being able to group or use distinct on XML columns.