Nodejs Streams - Help find my memory leak

纵饮孤独 提交于 2020-04-17 18:56:50

问题


So I have a process that selects from a table. I partition my select programmatically into 20 sub-selects. I then go through each on of those select and stream its data to an indexing client (solr). Every select memory jumps up and holds until I get an OOM.

I logged when each query went off and can be seen in in the following charts:

These correlate with each jump in the this memory graph:

14 of 20 queries ran before I oomed.

I see this behavior with code that is similar but with a delta that runs every 15 mins. Every delta holds some sort of memory until it eventually causes the server to crash with OOM (which recovers)

I have tried to track down issues with the delta past but gave up and just created a way to gracefully restart. What am I missing here?

Here is my entire code chain that makes this work... I know its a lot to look through but I figured as much detail as possible would help.

Library Stack:

"node": "~11.10.1"
"knex": "^0.20.9",
"oracledb": "^4.0.0"
"camelize2": "^1.0.0"

Knex - DB connection factory

'use strict'

const objection = require('objection')
const knex = require('knex')


module.exports = function ObjectionFactory(log) {
  class MyObjection extends objection.Model {
    constructor() {
      super()
    }
    static get tableName() {
      return ''
    }
  }

  MyObjection.pickJsonSchemaProperties = true

  log.info('Connecting to Oracle Pluggable...', {
    host: 'myHost',
    username: 'myUser',
    database: 'myDatabase"
  })

  const knexInstance = knex({
    client: 'oracledb',
    connection: 'connectionInfo',
    pool: {
      min: 0,
      max: 10
    },
    acquireConnectionTimeout: 10000
  })

  process.once('SIGINT', () => {
    log.info('Disconnecting from Oracle Pluggable.')
    knexInstance.destroy()
      .then(() => process.exit(0))
      .catch(() => process.exit(1))
  })
  // Shut down cleanly for nodemon
  process.once('SIGUSR2', () => {
    log.info('Disconnecting from Oracle Pluggable')
    knexInstance.destroy()
      .then(() => process.kill(process.pid, 'SIGUSR2'))
      .catch(() => process.kill(process.pid, 'SIGUSR2'))
  })

  const knexBoundClass = MyObjection.bindKnex(knexInstance)
  knexBoundClass.tag = 'Oracle Connection'
  return knexBoundClass
}

My Select Stream Code:

module.exports = function oracleStream(log, MyObjection) {

  const knex = MyObjection.knex()
  const fetchArraySize = 10000
  const outFormat = oracledb.OBJECT

  return {
    selectStream
  }

  async function selectStream(sql, bindings = [], fetchSize = fetchArraySize) {
    let connection = await knex.client.acquireConnection()

    log.info(`Fetch size is set to ${fetchSize}`)
    let select = connection.queryStream(sql, bindings, {
      fetchArraySize: fetchSize,
      outFormat: outFormat
    })

    select.on('error', (err) => {
      log.error('Oracle Error Event', err)
      knex.client.releaseConnection(connection)
    })

    select.on('end', () => {
      log.info('Destroying the Stream')
      select.destroy()
    })

    select.on('close', () => {
      log.info('Oracle Close Event')
      knex.client.releaseConnection(connection)
      select = null
      connection = null
    })

    return select
  }

}

My index/stream pipeline code

async function indexJob() {
    const reindexStartTime = new moment().local()

    let rowCount = 0
    log.info('Reindex Started at', reindexStartTime.format())
    let queryNumber = 1
    const partitionedQueries = ['Select * from table where 1=1', 'Select * from table where 2=2', 'Select * from table where 3=3'] //There  would be  20 queries in this array
    let partitionedQueriesLength = partitionedQueries.length

    while (partitionedQueries.length > 0) {
      let query = partitionedQueries.pop()

      log.info('RUNNING Query', {
        queryNumber: `${queryNumber++} of ${partitionedQueriesLength}`,
        query: query
      })

      let databaseStream = await oracleStream.selectStream(query, [], 10000) //10k represents the oracle fetch size

      databaseStream.on('data', () => {
        rowCount++
      })

      let logEveryFiveSec = setInterval(() => {
        log.info('Status: ', getReindexInfo(reindexStartTime, rowCount))
      }, 5000)

      try {
        let pipeline = util.promisify(stream.pipeline)
        await pipeline(
          databaseStream,
          camelizeAndStringify(),
          streamReindex(core)
        )
      } catch (err) {
        databaseStream.destroy(err)
        throw new JobFailedError(err)
      } finally {
        databaseStream.destroy()
        clearInterval(logEveryFiveSec)
      }
    }
  }

  function camelizeAndStringify() {
    let first = true
    const serialize = new Transform({
      objectMode: true,
      highWaterMark: 1000,
      transform(chunk, encoding, callback) {
        if (first) {
          this.push('[' + JSON.stringify(camelize(chunk)))
          first = false
        } else {
          this.push(',' + JSON.stringify(camelize(chunk)))
        }
        callback()
        chunk = null
      },
      flush(callback) {
        this.push(']')
        callback()
      }
    })
    return serialize
  }


function streamReindex(core) {
    const updateUrl = baseUrl + core + '/update'
    const options = {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json'
      },
      'auth': `${user.username}:${user.password}`,
    }
    let postStream = https.request(updateUrl, options, (res) => {
      let response = {
        status: {
          code: res.statusCode,
          message: res.statusMessage
        },
        headers: res.headers,
      }
      if (res.statusCode !== 200) {
        postStream.destroy(new Error(JSON.stringify(response)))
      }
    })
    postStream.on('error', (err)=>{
      throw new Error(err)
    })
    postStream.on('socket', (socket) => {
      socket.setKeepAlive(true, 110000)
    })
    return postStream
  }

来源:https://stackoverflow.com/questions/61259942/nodejs-streams-help-find-my-memory-leak

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!