Create a zip file on S3 from files on S3 using Lambda Node

前端 未结 3 1000
不思量自难忘°
不思量自难忘° 2020-12-05 10:29

I need to create a Zip file that consists of a selection of files (videos and images) located in my s3 bucket.

The problem at the moment using my code below is that

相关标签:
3条回答
  • 2020-12-05 10:35

    I formated the code according to @iocoker.

    main entry

    // index.js
    
    'use strict';
    const S3Zip = require('./s3-zip')
    
    const params = {
      files: [
        {
          fileName: '1.jpg',
          key: 'key1.JPG'
        },
        {
          fileName: '2.jpg',
          key: 'key2.JPG'
        }
      ],
      zippedFileKey: 'zipped-file-key.zip'
    }
    
    exports.handler = async event => {
      const s3Zip = new S3Zip(params);
      await s3Zip.process();
    
      return {
        statusCode: 200,
        body: JSON.stringify(
          {
            message: 'Zip file successfully!'
          }
        )
      };
    
    }
    
    

    Zip file util

    // s3-zip.js
    
    'use strict';
    const fs = require('fs');
    const AWS = require("aws-sdk");
    
    const Archiver = require('archiver');
    const Stream = require('stream');
    
    const https = require('https');
    const sslAgent = new https.Agent({
      KeepAlive: true,
      rejectUnauthorized: true
    });
    sslAgent.setMaxListeners(0);
    AWS.config.update({
      httpOptions: {
        agent: sslAgent,
      },
      region: 'us-east-1'
    });
    
    module.exports = class S3Zip {
      constructor(params, bucketName = 'default-bucket') {
        this.params = params;
        this.BucketName = bucketName;
      }
    
      async process() {
        const { params, BucketName } = this;
        const s3 = new AWS.S3({ apiVersion: '2006-03-01', params: { Bucket: BucketName } });
    
        // create readstreams for all the output files and store them
        const createReadStream = fs.createReadStream;
        const s3FileDwnldStreams = params.files.map(item => {
          const stream = s3.getObject({ Key: item.key }).createReadStream();
          return {
            stream,
            fileName: item.fileName
          }
        });
    
        const streamPassThrough = new Stream.PassThrough();
        // Create a zip archive using streamPassThrough style for the linking request in s3bucket
        const uploadParams = {
          ACL: 'private',
          Body: streamPassThrough,
          ContentType: 'application/zip',
          Key: params.zippedFileKey
        };
    
        const s3Upload = s3.upload(uploadParams, (err, data) => {
          if (err) {
            console.error('upload err', err)
          } else {
            console.log('upload data', data);
          }
        });
    
        s3Upload.on('httpUploadProgress', progress => {
          // console.log(progress); // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
        });
    
        // create the archiver
        const archive = Archiver('zip', {
          zlib: { level: 0 }
        });
        archive.on('error', (error) => {
          throw new Error(`${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`);
        });
    
        // connect the archiver to upload streamPassThrough and pipe all the download streams to it
        await new Promise((resolve, reject) => {
          console.log("Starting upload of the output Files Zip Archive");
    
          s3Upload.on('close', resolve());
          s3Upload.on('end', resolve());
          s3Upload.on('error', reject());
    
          archive.pipe(streamPassThrough);
          s3FileDwnldStreams.forEach((s3FileDwnldStream) => {
            archive.append(s3FileDwnldStream.stream, { name: s3FileDwnldStream.fileName })
          });
          archive.finalize();
    
        }).catch((error) => {
          throw new Error(`${error.code} ${error.message} ${error.data}`);
        });
    
        // Finally wait for the uploader to finish
        await s3Upload.promise();
    
      }
    }
    
    0 讨论(0)
  • 2020-12-05 10:48

    Using streams may be tricky as I'm not sure how you could pipe multiple streams into an object. I've done this several times using standard file object. It's a multistep process and it's quite fast. Remember that Lambda operates in Linux so you have all Linux resources at hand including the system /tmp directory.

    1. Create a sub-directory in /tmp call "transient" or whatever works for you
    2. Use s3.getObject() and write file objects to /tmp/transient
    3. Use the GLOB package to generate an array[] of paths from /tmp/transient
    4. Loop the array and zip.addLocalFile(array[i]);
    5. zip.writeZip('tmp/files.zip');
    0 讨论(0)
  • 2020-12-05 10:57

    Okay, I got to do this today and it works. Direct Buffer to Stream, no disk involved. So memory or disk limitation won't be an issue here:

    'use strict';
    
    const AWS = require("aws-sdk");
    AWS.config.update( { region: "eu-west-1" } );
    const s3 = new AWS.S3( { apiVersion: '2006-03-01'} );
    
    const   _archiver = require('archiver');
    
    //This returns us a stream.. consider it as a real pipe sending fluid to S3 bucket.. Don't forget it
    const streamTo = (_bucket, _key) => {
    	var stream = require('stream');
    	var _pass = new stream.PassThrough();
    	s3.upload( { Bucket: _bucket, Key: _key, Body: _pass }, (_err, _data) => { /*...Handle Errors Here*/ } );
    	return _pass;
    };
          
    exports.handler = async (_req, _ctx, _cb) => {
    	var _keys = ['list of your file keys in s3'];
    	
        var _list = await Promise.all(_keys.map(_key => new Promise((_resolve, _reject) => {
                s3.getObject({Bucket:'bucket-name', Key:_key})
                    .then(_data => _resolve( { data: _data.Body, name: `${_key.split('/').pop()}` } ));
            }
        ))).catch(_err => { throw new Error(_err) } );
    
        await new Promise((_resolve, _reject) => { 
            var _myStream = streamTo('bucket-name', 'fileName.zip');		//Now we instantiate that pipe...
            var _archive = _archiver('zip');
            _archive.on('error', err => { throw new Error(err); } );
            
            //Your promise gets resolved when the fluid stops running... so that's when you get to close and resolve
            _myStream.on('close', _resolve);
            _myStream.on('end', _resolve);
            _myStream.on('error', _reject);
            
            _archive.pipe(_myStream);			//Pass that pipe to _archive so it can push the fluid straigh down to S3 bucket
            _list.forEach(_itm => _archive.append(_itm.data, { name: _itm.name } ) );		//And then we start adding files to it
            _archive.finalize();				//Tell is, that's all we want to add. Then when it finishes, the promise will resolve in one of those events up there
        }).catch(_err => { throw new Error(_err) } );
        
        _cb(null, { } );		//Handle response back to server
    };

    0 讨论(0)
提交回复
热议问题