BigQuery has facilities to parse JSON in real-time interactive queries: Just store the JSON encoded object as a string, and query in real time, with functions like JSON_EXTR
This is what I came up with (Specifically for StandardSQL).. Not sure if accumulating in a list is the best method... Also.. I simplified for my case where I'm just concerned with keys.
CREATE TEMPORARY FUNCTION Foo(infoo STRING)
RETURNS Array
LANGUAGE js AS """
blah = [];
function processKey(node, parent) {
if (parent !== '') {parent += '.'};
Object.keys(node).forEach(function(key) {
value = node[key].toString();
if (value !== '[object Object]') {
blah.push(parent+key)
} else {
processKey(node[key], parent + key);
};
});
};
try {
x = JSON.parse(infoo);
processKey(x,'');
return blah;
} catch (e) { return null }
"""
OPTIONS ();
WITH x as(
select Foo(jsonfield) as bbb from clickstream.clikcs
)
select distinct arr_item from (SELECT arr_item FROM x, UNNEST(bbb) as arr_item)