BigQuery UDF memory exceeded error on multiple rows but works fine on single row

前端未结

关注

 3  939

小蘑菇 2020-12-20 03:13

I\'m writing a UDF to process Google Analytics data, and getting the \"UDF out of memory\" error message when I try to process multiple rows. I downloaded the raw data and f

3条回答

遥遥无期 (楼主)

2020-12-20 03:49

I love the concept of parsing my logs in BigQuery, but I've got the same problem, I get

Error: Resources exceeded during query execution.

The Job Id is bigquery-looker:bquijob_260be029_153dd96cfdb, if that at all helps.

I wrote a very basic parser does a simple match and returns rows. Works just fine on a 10K row data set, but I get out of resources when trying to run against a 3M row logfile.

Any suggestions for a work around?

Here is the javascript code.

function parseLogRow(row, emit) {

  r =  (row.logrow ? row.logrow : "") + (typeof row.l2 !== "undefined" ? " " + row.l2 : "") + (row.l3 ? " " + row.l3 : "")
  ts = null
  category = null
  user = null
  message = null
  db = null
  found = false
  if (r) {
      m = r.match(/^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d (\+|\-)\d\d\d\d) \[([^|]*)\|([^|]*)\|([^\]]*)\] :: (.*)/ )
      if( m){
        ts = new Date(m[1])/1000
        category = m[3] || null
        user = m[4] || null
        db = m[5] || null
        message = m[6] || null
        found = true
      }
      else {
        message = r
        found = false
      }
   }

  emit({
    ts:  ts,
    category: category,
    user: user,
    db: db,
    message: message,
    found: found
    });
}

bigquery.defineFunction(
  'parseLogRow',                           // Name of the function exported to SQL
  ['logrow',"l2","l3"],                    // Names of input columns
  [
    {'name': 'ts', 'type': 'timestamp'},  // Output schema
    {'name': 'category', 'type': 'string'},
    {'name': 'user', 'type': 'string'},
    {'name': 'db', 'type': 'string'},
    {'name': 'message', 'type': 'string'},
    {'name': 'found', 'type': 'boolean'},
  ],
  parseLogRow                          // Reference to JavaScript UDF
);

0 讨论(0)

查看其它3个回答