问题
I have a master-slave table structure. Col1
identify a user, Col2
identify his master
I'm creating a query that builds an array which represents the entire hierarchy (my master, the master of my master, ...) and also in the opposite direction (my slaves, the slaves of my slaves, ...)
WITH sample_data AS (
SELECT "user1" as name, "user2" as handler
UNION ALL
SELECT "user2" as name, "user3" as handler
UNION ALL
SELECT "user3" as name, "user4" as handler
UNION ALL
SELECT "user5" as name, "user1" as handler
UNION ALL
SELECT "user6" as name, "user1" as handler
UNION ALL
SELECT "user7" as name, "user8" as handler
UNION ALL
SELECT "user8" as name, "user9" as handler
)
SELECT
level1.name,
-- handle "nulls" for missing levels
ARRAY(
SELECT a
FROM UNNEST(([ level1.handler, up_level2.handler, up_level3.handler, up_level4.handler, up_level5.handler ])) AS a
WHERE a is not null
) AS masters,
ARRAY(
SELECT a
FROM UNNEST(([ down_level3.handler, down_level4.handler, down_level5.handler ])) AS a
WHERE a is not null
) AS slaves
FROM sample_data AS level1
-- Join for handler (hierarchy in up direction)
LEFT JOIN sample_data AS up_level2 ON level1.handler = up_level2.name
LEFT JOIN sample_data AS up_level3 ON up_level2.handler = up_level3.name
LEFT JOIN sample_data AS up_level4 ON up_level3.handler = up_level4.name
LEFT JOIN sample_data AS up_level5 ON up_level4.handler = up_level5.name
-- Join for handler (hierarchy in down direction)
LEFT JOIN sample_data AS down_level2 ON level1.name = down_level2.handler
LEFT JOIN sample_data AS down_level3 ON down_level2.name = down_level3.handler
LEFT JOIN sample_data AS down_level4 ON down_level3.name = down_level4.handler
LEFT JOIN sample_data AS down_level5 ON down_level4.name = down_level5.handler
This is the first approach I thought about, continuing join the table on itself in order to follow the path and then construct an array with all found items. It is working, but the level of depth is fixed (in this case up to level 5). Is there a way to optimize the query in order to not limit the depth of the hierarchy?
回答1:
I spent some time to make it happen, and it's finally ready :)
To use it, you should change main_data to your source table.
To tell basically what it does, it first finds the top level records. Then, adds their first level children. After completing the first level, then moves to the second level and adds them, etc.
I used a loop to make it recursive. Just in case I made a mistake in the code, I added a security variable, which is max_loops. So in this case, if it loops 10 times, it stops. You can change that parameter from the beginning.
If the code is not clear, please let me know, so I can help.
DECLARE nth_loop INT64 DEFAULT 0;
DECLARE max_loops INT64 DEFAULT 10;
DECLARE unique_masters INT64;
CREATE TEMP TABLE main_data AS
(
SELECT 'user1' as name, 'user2' as handler union all
SELECT 'user2' as name, 'user3' as handler union all
SELECT 'user3' as name, 'user4' as handler union all
SELECT 'user5' as name, 'user1' as handler union all
SELECT 'user6' as name, 'user1' as handler union all
SELECT 'user7' as name, 'user8' as handler union all
SELECT 'user8' as name, 'user9' as handler
);
SET unique_masters = (SELECT COUNT(DISTINCT name) FROM main_data);
CREATE TEMP TABLE output (
name STRING,
handlers ARRAY<STRING>,
masters ARRAY<STRING>,
level INT64
);
WHILE (SELECT COUNT(*) FROM output) < unique_masters AND nth_loop < max_loops
DO
SET nth_loop = nth_loop + 1;
CREATE OR REPLACE TEMP TABLE output AS
WITH
remaining_items AS
(
SELECT name, handler
FROM main_data m
LEFT JOIN output o
USING(name)
WHERE o.name IS NULL
),
new_level AS
(
SELECT
r1.name,
r1.handler
FROM remaining_items r1
LEFT JOIN remaining_items r2
ON r1.name = r2.handler
WHERE r2.handler IS NULL
)
select
nl.name,
[nl.handler] as handlers,
ARRAY_AGG(master IGNORE NULLS) as masters,
COALESCE((SELECT MAX(level) FROM output ), 0) + 1 as level
from new_level nl
left join (
SELECT handler as name, name as master
FROM output
JOIN UNNEST(handlers) handler
)
USING(name)
GROUP BY nl.name, nl.handler
union all
select
o.name,
ANY_VALUE(o.handlers) || COALESCE(ARRAY_AGG(distinct nl.handler IGNORE NULLS), array<string>[]) as handlers,
ANY_VALUE(masters) as masters,
o.level as level
from output as o
left join unnest(o.handlers) h
left join new_level nl
on nl.name = h
group by o.name, o.level
order by level, name;
END WHILE;
INSERT INTO output (name, handlers, masters, level)
WITH
bottom_level AS (
SELECT m1.name as master, m1.handler as name
FROM main_data m1
LEFT JOIN main_data m2
ON m1.handler = m2.name
WHERE m2.name IS NULL
)
select
bl.name,
ARRAY<STRING>[] as handlers,
output.masters || [bl.master] as masters,
output.level + 1 as level
from bottom_level bl
join output
ON output.name = bl.master
;
SELECT *
FROM output
ORDER BY name;
来源:https://stackoverflow.com/questions/61234264/bigquery-construct-hierarchy-array-from-key-master-slave