问题
I have a query which fetches result by partitioning counterid, status, source as rank.
Using query as
select ch.* except (seqnum, seqnum_s),
row_number() over (partition by counter_id, status,source, (seqnum_s - seqnum)
order by transaction_time
) as ranking
from (select ch.*,
row_number() over (partition by counter_id order,status by transaction_time) as seqnum,
row_number() over (partition by counter_id, status,source order by transaction_time) as seqnum_s
from counter_history ch
) ch;
but I need a result as given below.
CounterID status transaction_time source Rank
121 new Nov-09-2019 8:32:19 UNDEFINED 1
121 new Nov-09-2019 8:32:20 UNDEFINED 2
121 CLAIMED Nov-09-2019 8:32:21 WORKFLOW 1
121 new Nov-09-2019 8:32:22 WORKFLOW 1
121 CLAIMED Nov-09-2019 8:32:59 external 1
121 CLAIMED Nov-09-2019 8:33:00 external 2
121 CLAIMED Nov-09-2019 8:33:01 external 3
233 new Nov-09-2019 8:33:32 external 1
233 new Nov-09-2019 8:49:58 internal 1
233 RESOLVED Nov-13-2019 6:51:11 internal 1
233 CLAIMED Nov-11-2019 22:15:52 WORKFLOW 1
233 new Nov-11-2019 22:15:54 external 1
233 CLAIMED Nov-11-2019 22:15:55 external 1
156 new Nov-11-2019 22:15:56 UNDEFINED 1
156 CLAIMED Nov-11-2019 22:16:24 WORKFLOW 1
156 CLAIMED Nov-11-2019 22:16:28 external 2
156 RESOLVE Nov-11-2019 22:16:56 external 1
Kindly help out if there is any issue in the query. using #standardsql Thanks
回答1:
Below is for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.counter_history` AS (
SELECT 121 CounterID, 'new' status, 'Nov-09-2019 8:32:19' transaction_time, 'UNDEFINED' source UNION ALL
SELECT 121, 'new', 'Nov-09-2019 8:32:20', 'UNDEFINED' UNION ALL
SELECT 121, 'CLAIMED', 'Nov-09-2019 8:32:21', 'WORKFLOW' UNION ALL
SELECT 121, 'new', 'Nov-09-2019 8:32:22', 'WORKFLOW' UNION ALL
SELECT 121, 'CLAIMED', 'Nov-09-2019 8:32:59', 'external' UNION ALL
SELECT 121, 'CLAIMED', 'Nov-09-2019 8:33:00', 'external' UNION ALL
SELECT 121, 'CLAIMED', 'Nov-09-2019 8:33:01', 'external' UNION ALL
SELECT 233, 'new', 'Nov-09-2019 8:33:32', 'external' UNION ALL
SELECT 233, 'new', 'Nov-09-2019 8:49:58', 'internal' UNION ALL
SELECT 233, 'RESOLVED', 'Nov-13-2019 6:51:11', 'internal' UNION ALL
SELECT 233, 'CLAIMED', 'Nov-11-2019 22:15:52', 'WORKFLOW' UNION ALL
SELECT 233, 'new', 'Nov-11-2019 22:15:54', 'external' UNION ALL
SELECT 233, 'CLAIMED', 'Nov-11-2019 22:15:55', 'external' UNION ALL
SELECT 156, 'new', 'Nov-11-2019 22:15:56', 'UNDEFINED' UNION ALL
SELECT 156, 'CLAIMED', 'Nov-11-2019 22:16:24', 'WORKFLOW' UNION ALL
SELECT 156, 'CLAIMED', 'Nov-11-2019 22:16:28', 'external' UNION ALL
SELECT 156, 'RESOLVE', 'Nov-11-2019 22:16:56', 'external'
), temp AS (
SELECT * REPLACE(PARSE_TIMESTAMP('%b-%d-%Y %T', transaction_time) AS transaction_time)
FROM `project.dataset.counter_history`
)
SELECT * EXCEPT(flag, grp),
ROW_NUMBER() OVER(PARTITION BY CounterID, grp ORDER BY transaction_time) ranking
FROM (
SELECT *, COUNTIF(flag) OVER(PARTITION BY CounterID ORDER BY transaction_time) grp
FROM (
SELECT *, (status, source) != LAG((status, source)) OVER(PARTITION BY CounterID ORDER BY transaction_time) flag
FROM temp
)
)
-- ORDER BY CounterID, transaction_time
As you can see above also takes care of translating transaction_time
presented as STRING data type to DATE, so ordering is done properly. without this output will be wrong!!
回答2:
It looks like you are starting over again for each counterid/source/status combination. For that, tweak the query like this:
select ch.* except (seqnum, seqnum_s),
row_number() over (partition by counter_id, status, source, (seqnum_s - seqnum)
order by transaction_time
) as ranking
from (select ch.*,
row_number() over (partition by counter_id order by transaction_time) as seqnum,
row_number() over (partition by counter_id, status, source order by transaction_time) as seqnum_s
from counter_history ch
) ch;
The only difference from your query is that status
and source
are in the second row_number()
and the first still counts only by counter_id
.
来源:https://stackoverflow.com/questions/59416933/need-partition-by-three-field-and-ranking