I need a query to find column names of a table (table metadata) in Bigquery, like the following query in SQL:
SELECT column_name,data_type,data_length,data_p
Actually it is possible to do so using SQL. To do so you need to query the logging table for the last log of this particular table being created.
For example, assuming the table is loaded/created daily:
CREATE TEMP FUNCTION jsonSchemaStringToArray(jsonSchema String)
RETURNS ARRAY AS ((
SELECT
SPLIT(
REGEXP_REPLACE(REPLACE(LTRIM(jsonSchema,'{ '),'"fields": [',''), r'{[^{]+"name": "([^\"]+)"[^}]+}[, ]*', '\\1,')
,',')
));
WITH valid_schema_columns AS (
WITH array_output aS (SELECT
jsonSchemaStringToArray(jsonSchema) AS column_names
FROM (
SELECT
protoPayload.serviceData.jobInsertRequest.resource.jobConfiguration.load.schemaJson AS jsonSchema
, ROW_NUMBER() OVER (ORDER BY metadata.timestamp DESC) AS record_count
FROM `realself-main.bigquery_logging.cloudaudit_googleapis_com_data_access_20170101`
WHERE
protoPayload.serviceData.jobInsertRequest.resource.jobConfiguration.load.destinationTable.tableId = ''
AND
protoPayload.serviceData.jobInsertRequest.resource.jobConfiguration.load.destinationTable.datasetId = ''
AND
protoPayload.serviceData.jobInsertRequest.resource.jobConfiguration.load.createDisposition = 'CREATE_IF_NEEDED'
) AS t
WHERE
t.record_count = 1 -- grab the latest entry
)
-- this is actually what UNNESTS the array into standard rows
SELECT
valid_column_name
FROM array_output
LEFT JOIN UNNEST(column_names) AS valid_column_name
)