BigQuery UDF Internal Error

纵饮孤独 提交于 2019-12-04 05:03:32

问题


We had a simple UDF in BigQuery that somehow throws an error that keeps returning

Query Failed
Error: An internal error occurred and the request could not be completed.

The query was simply trying to use UDF to perform a SHA256.

SELECT
  input AS title,
  input_sha256 AS title_sha256
FROM
  SHA256(
      SELECT
        title AS input
      FROM
        [bigquery-public-data:hacker_news.stories]
      GROUP BY
        input 
  )
LIMIT
  1000

The in-line UDF is pasted below. However I can not post the full UDF as StackOverflow complaints too much code in the post. The full UDF can be seen this gist.

function sha256(row, emit) {
  emit(
      {
        input: row.input,
        input_sha256: CryptoJS.SHA256(row.input).toString(CryptoJS.enc.Hex)
      }
  );
}

bigquery.defineFunction(
  'SHA256',                           // Name of the function exported to SQL
  ['input'],                    // Names of input columns
  [
      {'name': 'input', 'type': 'string'},
      {'name': 'input_sha256', 'type': 'string'}
  ],
  sha256                       // Reference to JavaScript UDF
);

Not sure if it helps, but the Job-ID is

bigquery:bquijob_7fd3b51c_153c058dc7c

Looks like there is a similar issue at:

https://code.google.com/p/google-bigquery/issues/detail?id=478

回答1:


Short answer - this is an issue related to memory allocation that I uncovered via my own testing and fixed today, but it will take a little while to flow out to production.

Slightly longer answer - we just rolled out a fix today for an issue where users who were having "out of memory" issues when scaling up their UDFs over larger number of rows, even though the UDF would succeed on smaller numbers of rows. The queries that were hitting that condition are now running fine on our internal / test trees. However, since public BigQuery hosts have much higher traffic loads, the JavaScript engine that executes the UDFs (V8) behaves somewhat differently in production than it does in internal trees. Specifically, there's a new memory allocation error that some of the previously OOMing jobs are now hitting that we couldn't observe until the queries ran on a fully-loaded tree.

It's a minor error with a quick fix, but we'd ideally let it flow through our regular testing and QA cycle. This should put the fix in production in about a week, assuming nothing else goes wrong with the candidate. Would that be acceptable for you?




回答2:


i am re-using answer box to provide full query text. it works if uncomment LIMIT 40

SELECT input, input_sha256 FROM JS(
(
  SELECT title AS input
  FROM [bigquery-public-data:hacker_news.stories]
  GROUP BY input
  //LIMIT 40 
),
input,
"[ {'name': 'input', 'type': 'string'}, {'name': 'input_sha256', 'type': 'string'} ] ",

"function(row, emit) { 
  var CryptoJS=CryptoJS||function(h,s){var f={},g=f.lib={},q=function(){},m=g.Base={extend:function(a){q.prototype=this;var c=new q;a&&c.mixIn(a);c.hasOwnProperty('init')||(c.init=function(){c.$super.init.apply(this,arguments)});c.init.prototype=c;c.$super=this;return c},create:function(){var a=this.extend();a.init.apply(a,arguments);return a},init:function(){},mixIn:function(a){for(var c in a)a.hasOwnProperty(c)&&(this[c]=a[c]);a.hasOwnProperty('toString')&&(this.toString=a.toString)},clone:function(){return this.init.prototype.extend(this)}}, r=g.WordArray=m.extend({init:function(a,c){a=this.words=a||[];this.sigBytes=c!=s?c:4*a.length},toString:function(a){return(a||k).stringify(this)},concat:function(a){var c=this.words,d=a.words,b=this.sigBytes;a=a.sigBytes;this.clamp();if(b%4)for(var e=0;e<a;e++)c[b+e>>>2]|=(d[e>>>2]>>>24-8*(e%4)&255)<<24-8*((b+e)%4);else if(65535<d.length)for(e=0;e<a;e+=4)c[b+e>>>2]=d[e>>>2];else c.push.apply(c,d);this.sigBytes+=a;return this},clamp:function(){var a=this.words,c=this.sigBytes;a[c>>>2]&=4294967295<< 32-8*(c%4);a.length=h.ceil(c/4)},clone:function(){var a=m.clone.call(this);a.words=this.words.slice(0);return a},random:function(a){for(var c=[],d=0;d<a;d+=4)c.push(4294967296*h.random()|0);return new r.init(c,a)}}),l=f.enc={},k=l.Hex={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++){var e=c[b>>>2]>>>24-8*(b%4)&255;d.push((e>>>4).toString(16));d.push((e&15).toString(16))}return d.join('')},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b+=2)d[b>>>3]|=parseInt(a.substr(b, 2),16)<<24-4*(b%8);return new r.init(d,c/2)}},n=l.Latin1={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++)d.push(String.fromCharCode(c[b>>>2]>>>24-8*(b%4)&255));return d.join('')},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b++)d[b>>>2]|=(a.charCodeAt(b)&255)<<24-8*(b%4);return new r.init(d,c)}},j=l.Utf8={stringify:function(a){try{return decodeURIComponent(escape(n.stringify(a)))}catch(c){throw Error('Malformed UTF-8 data');}},parse:function(a){return n.parse(unescape(encodeURIComponent(a)))}}, u=g.BufferedBlockAlgorithm=m.extend({reset:function(){this._data=new r.init;this._nDataBytes=0},_append:function(a){'string'==typeof a&&(a=j.parse(a));this._data.concat(a);this._nDataBytes+=a.sigBytes},_process:function(a){var c=this._data,d=c.words,b=c.sigBytes,e=this.blockSize,f=b/(4*e),f=a?h.ceil(f):h.max((f|0)-this._minBufferSize,0);a=f*e;b=h.min(4*a,b);if(a){for(var g=0;g<a;g+=e)this._doProcessBlock(d,g);g=d.splice(0,a);c.sigBytes-=b}return new r.init(g,b)},clone:function(){var a=m.clone.call(this); a._data=this._data.clone();return a},_minBufferSize:0});g.Hasher=u.extend({cfg:m.extend(),init:function(a){this.cfg=this.cfg.extend(a);this.reset()},reset:function(){u.reset.call(this);this._doReset()},update:function(a){this._append(a);this._process();return this},finalize:function(a){a&&this._append(a);return this._doFinalize()},blockSize:16,_createHelper:function(a){return function(c,d){return(new a.init(d)).finalize(c)}},_createHmacHelper:function(a){return function(c,d){return(new t.HMAC.init(a, d)).finalize(c)}}});var t=f.algo={};return f}(Math); 
  (function(h){for(var s=CryptoJS,f=s.lib,g=f.WordArray,q=f.Hasher,f=s.algo,m=[],r=[],l=function(a){return 4294967296*(a-(a|0))|0},k=2,n=0;64>n;){var j;a:{j=k;for(var u=h.sqrt(j),t=2;t<=u;t++)if(!(j%t)){j=!1;break a}j=!0}j&&(8>n&&(m[n]=l(h.pow(k,0.5))),r[n]=l(h.pow(k,1/3)),n++);k++}var a=[],f=f.SHA256=q.extend({_doReset:function(){this._hash=new g.init(m.slice(0))},_doProcessBlock:function(c,d){for(var b=this._hash.words,e=b[0],f=b[1],g=b[2],j=b[3],h=b[4],m=b[5],n=b[6],q=b[7],p=0;64>p;p++){if(16>p)a[p]= c[d+p]|0;else{var k=a[p-15],l=a[p-2];a[p]=((k<<25|k>>>7)^(k<<14|k>>>18)^k>>>3)+a[p-7]+((l<<15|l>>>17)^(l<<13|l>>>19)^l>>>10)+a[p-16]}k=q+((h<<26|h>>>6)^(h<<21|h>>>11)^(h<<7|h>>>25))+(h&m^~h&n)+r[p]+a[p];l=((e<<30|e>>>2)^(e<<19|e>>>13)^(e<<10|e>>>22))+(e&f^e&g^f&g);q=n;n=m;m=h;h=j+k|0;j=g;g=f;f=e;e=k+l|0}b[0]=b[0]+e|0;b[1]=b[1]+f|0;b[2]=b[2]+g|0;b[3]=b[3]+j|0;b[4]=b[4]+h|0;b[5]=b[5]+m|0;b[6]=b[6]+n|0;b[7]=b[7]+q|0},_doFinalize:function(){var a=this._data,d=a.words,b=8*this._nDataBytes,e=8*a.sigBytes; d[e>>>5]|=128<<24-e%32;d[(e+64>>>9<<4)+14]=h.floor(b/4294967296);d[(e+64>>>9<<4)+15]=b;a.sigBytes=4*d.length;this._process();return this._hash},clone:function(){var a=q.clone.call(this);a._hash=this._hash.clone();return a}});s.SHA256=q._createHelper(f);s.HmacSHA256=q._createHmacHelper(f)})(Math); 
  (function(){var h=CryptoJS,j=h.lib.WordArray;h.enc.Base64={stringify:function(b){var e=b.words,f=b.sigBytes,c=this._map;b.clamp();b=[];for(var a=0;a<f;a+=3)for(var d=(e[a>>>2]>>>24-8*(a%4)&255)<<16|(e[a+1>>>2]>>>24-8*((a+1)%4)&255)<<8|e[a+2>>>2]>>>24-8*((a+2)%4)&255,g=0;4>g&&a+0.75*g<f;g++)b.push(c.charAt(d>>>6*(3-g)&63));if(e=c.charAt(64))for(;b.length%4;)b.push(e);return b.join('')},parse:function(b){var e=b.length,f=this._map,c=f.charAt(64);c&&(c=b.indexOf(c),-1!=c&&(e=c));for(var c=[],a=0,d=0;d< e;d++)if(d%4){var g=f.indexOf(b.charAt(d-1))<<2*(d%4),h=f.indexOf(b.charAt(d))>>>6-2*(d%4);c[a>>>2]|=(g|h)<<24-8*(a%4);a++}return j.create(c,a)},_map:'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='}})(); 
  emit( { input: row.input, input_sha256: CryptoJS.SHA256(row.input).toString(CryptoJS.enc.Hex) } ); 
}" 
)


来源:https://stackoverflow.com/questions/36274752/bigquery-udf-internal-error

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!