问题
I am joining two tables based on closest timestamp in BigQuery and getting this error. Unsupported subquery with table in join predicate.
SELECT gs.user_session_id
,dtc._date
,dtc.hit_timestamp _timestamp
,dtc.user_id
FROM ga.2_deduped_twice_click_data dtc LEFT JOIN ga.sessions gs ON dtc.user_id = gs.user_id
and dtc.hit_timestamp = ( SELECT dtc2.hit_timestamp FROM ga.2_deduped_twice_click_data dtc2 order by ABS(TIMESTAMP_DIFF(dtc.hit_timestamp, gs._timestamp, MILLISECOND)) LIMIT 1 )
回答1:
You could try a mix of ARRAY_AGG
and ORDER BY diff LIMIT 1
:
WITH a AS (
SELECT * FROM UNNEST(
[STRUCT(TIMESTAMP('2018-01-02 20:01:00') AS time, 'monkey' AS animal)
,STRUCT('2018-03-04 10:10:10', 'lion')
,STRUCT('2018-07-04 10:10:10', 'donkey')
])
),
b AS (
SELECT * FROM UNNEST(
[STRUCT(TIMESTAMP('2017-01-02 10:01:00') AS time, 'one' AS festival)
,STRUCT('2019-03-04 10:10:10', 'two')
,STRUCT('2018-07-04 10:10:10', 'three')
,STRUCT('2018-03-05 10:10:10', 'four')
])
)
SELECT b,
(SELECT AS STRUCT *
FROM a
ORDER BY ABS(TIMESTAMP_DIFF(b.time, a.time, SECOND))
LIMIT 1) closest
FROM b
来源:https://stackoverflow.com/questions/58170852/bigquery-join-based-on-closest-timestamp