问题
I have the following table in Teradata 14 , I am not allowed to write procedures and functions myself, but i can use strtok
, strtok_split_to_table
etc
id property
1 1234X (Yel), 2225Y (Red), 1234X (Gre),
2
3 1222Y (Pin),
4 1134E (Yel), 4565Y (Whi), 1134E (Red), 2222Y (Red),
How can I group the above table so that each object would have all attributes listed in one brackets
id property
1 1234X (Yel Gre), 2225Y (Red),
2
3 1222Y (Pin ),
4 1134E (Yel Red), 4565Y (Whi), 2222Y (Red),
The property code is always a 5 character string, e.g. 1222Y
. The color code is always 3 character , e.g. Pin
I tried using this solution but got an error A column or character expression is larger than max size
In addition I tried strtok_split_to_table
and was able to create a modified table, but do not how to proceed from that
回答1:
Why do you store denormalized data in a RDBMS and then process it to create even worse denormalized output?
Modifying my solution from the link you posted to utilize STRTOK_SPLIT_TO_TABLE instead of recursion:
SELECT
id,
MAX(CASE WHEN newpos = 1 AND newgrp <> '(),' THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 2 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 3 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 4 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 5 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 6 THEN newgrp ELSE '' END)
-- add as many CASEs as needed
FROM
(
SELECT
id,
ROW_NUMBER()
OVER (PARTITION BY id
ORDER BY newgrp) AS newpos,
TRIM(a || ' (' ||
MAX(CASE WHEN tokennum = 1 THEN b || ' ' ELSE '' END) ||
MAX(CASE WHEN tokennum = 2 THEN b || ' ' ELSE '' END) ||
MAX(CASE WHEN tokennum = 3 THEN b || ' ' ELSE '' END) ||
MAX(CASE WHEN tokennum = 4 THEN b || ' ' ELSE '' END) ||
MAX(CASE WHEN tokennum = 5 THEN b || ' ' ELSE '' END) ||
MAX(CASE WHEN tokennum = 6 THEN b || ' ' ELSE '' END)
-- add as many CASEs as needd
) || '), ' AS newgrp
FROM
(
SELECT
id, tokennum,
TRIM(SUBSTRING(token FROM 1 FOR POSITION('(' IN TRIM(token)||'(') - 1)) AS a,
TRIM(TRAILING ')' FROM SUBSTRING(token FROM POSITION('(' IN token) + 1)) AS b
FROM
TABLE( STRTOK_SPLIT_TO_TABLE(vt.id, vt.property, ',')
RETURNS (id INT,
tokennum INT,
token VARCHAR(30) CHARACTER SET UNICODE
)
) AS dt
) AS dt
GROUP BY id, a
) AS dt
GROUP BY id;
If you got access to the TDStats.udfconcat function it can be further simplified (but there's way to control the order of properties:
SELECT id,
CASE
WHEN TRIM(TDStats.udfconcat(' ' || a || ' ' || b)) || ',' <> '(),'
THEN TRIM(TDStats.udfconcat(' ' || a || ' ' || b)) || ','
ELSE ''
END
FROM
(
SELECT
id,
TRIM(SUBSTRING(token FROM 1 FOR POSITION('(' IN TRIM(token)||'(') - 1)) AS a,
'('|| OTRANSLATE(TDStats.udfconcat(TRIM(TRAILING ')' FROM SUBSTRING(token FROM POSITION('(' IN token) + 1))), ',', ' ') || ')'AS b
FROM
TABLE( STRTOK_SPLIT_TO_TABLE(vt.id, vt.property, ',')
RETURNS (id INT,
tokennum INT,
token VARCHAR(30) CHARACTER SET UNICODE
)
) AS dt
GROUP BY id, a
) AS dt
GROUP BY id;
Most of the work was fiddling with the spaces and commas in the right place to get the requested output.
Still i would never store data as such in a RDBMS.
回答2:
Try this , I had slightly modified dnoeths query from your post
WITH RECURSIVE cte
(id,
len,
remaining,
word,
pos
) AS (
SELECT
id,
POSITION(',' IN property || ',') - 1 AS len,
SUBSTRING(property || ',' FROM len + 2) AS remaining,
TRIM(SUBSTRING(property FROM 1 FOR len)) AS word,
1
FROM TableA
UNION ALL
SELECT
id,
POSITION(',' IN remaining)- 1 AS len_new,
SUBSTRING(remaining FROM len_new + 2),
TRIM(SUBSTRING(remaining FROM 1 FOR len_new)),
pos + 1
FROM cte
WHERE remaining <> ''
)
SELECT
id,
MAX(CASE WHEN newpos = 1 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 2 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 3 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 4 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 5 THEN newgrp ELSE '' END) ||
MAX(CASE WHEN newpos = 6 THEN newgrp ELSE '' END)
-- add as many CASEs as needed
FROM
(
SELECT
id,
ROW_NUMBER()
OVER (PARTITION BY id
ORDER BY newgrp) AS newpos,
a ||
MAX(CASE WHEN pos = 1 THEN '(' || b ELSE '' END) ||
MAX(CASE WHEN pos = 2 THEN ' ' || b ELSE '' END) ||
MAX(CASE WHEN pos = 3 THEN ' ' || b ELSE '' END) ||
MAX(CASE WHEN pos = 4 THEN ' ' || b ELSE '' END) ||
MAX(CASE WHEN pos = 5 THEN ' ' || b ELSE '' END) ||
MAX(CASE WHEN pos = 6 THEN ' ' || b ELSE '' END)
-- add as many CASEs as needed
|| '), ' AS newgrp
FROM
(
SELECT
id,
ROW_NUMBER()
OVER (PARTITION BY id, a
ORDER BY pos) AS pos,
SUBSTRING(word FROM 1 FOR POSITION('(' IN word) - 1) AS a,
TRIM(TRAILING ')' FROM SUBSTRING(word FROM POSITION('(' IN word) + 1)) AS b
FROM cte
WHERE word <> ''
) AS dt
GROUP BY id, a
) AS dt
GROUP BY id
UNION ALL
SELECT id,property FROM TableA WHERE property IS NULL OR TRIM(property)=' ';
来源:https://stackoverflow.com/questions/23021930/how-to-group-substrings-in-teradata-14