What is the fastest way to write a lot of documents to Firestore?

前端 未结 2 1908
后悔当初
后悔当初 2020-11-28 06:15

I need to write a large number of documents to Firestore.

What is the fastest way to do this in Node.js?

2条回答
  •  长情又很酷
    2020-11-28 07:00

    As noted in a comment to the OP, I've had the opposite experience when writing documents to Firestore inside a Cloud Function.

    TL;DR: Parallel individual writes are over 5x slower than parallel batch writes when writing 1200 documents to Firestore.

    The only explanation I can think of for this, is some sort of bottleneck or request rate limiting happening between Google cloud functions and Firestore. It's a bit of a mystery.

    Here's the code for both methods I benchmarked:

    const functions = require('firebase-functions');
    const admin = require('firebase-admin');
    
    
    admin.initializeApp();
    const db = admin.firestore();
    
    
    // Parallel Batch Writes
    exports.cloneAppBatch = functions.https.onCall((data, context) => {
    
        return new Promise((resolve, reject) => {
    
            let fromAppKey = data.appKey;
            let toAppKey = db.collection('/app').doc().id;
    
    
            // Clone/copy data from one app subcollection to another
            let startTimeMs = Date.now();
            let docs = 0;
    
            // Write the app document (and ensure cold start doesn't affect timings below)
            db.collection('/app').doc(toAppKey).set({ desc: 'New App' }).then(() => {
    
                // Log Benchmark
                functions.logger.info(`[BATCH] 'Write App Config Doc' took ${Date.now() - startTimeMs}ms`);
    
    
                // Get all documents in app subcollection
                startTimeMs = Date.now();
    
                return db.collection(`/app/${fromAppKey}/data`).get();
    
            }).then(appDataQS => {
    
                // Log Benchmark
                functions.logger.info(`[BATCH] 'Read App Data' took ${Date.now() - startTimeMs}ms`);
    
    
                // Batch up documents and write to new app subcollection
                startTimeMs = Date.now();
    
                let commits = [];
                let bDocCtr = 0;
                let batch = db.batch();
    
                appDataQS.forEach(docSnap => {
    
                    let doc = docSnap.data();
                    let docKey = docSnap.id;
                    docs++;
    
                    let docRef = db.collection(`/app/${toAppKey}/data`).doc(docKey);
    
                    batch.set(docRef, doc);
                    bDocCtr++
    
                    if (bDocCtr >= 500) {
                        commits.push(batch.commit());
                        batch = db.batch();
                        bDocCtr = 0;
                    }
    
                });
    
                if (bDocCtr > 0) commits.push(batch.commit());
    
                Promise.all(commits).then(results => {
                    // Log Benchmark
                    functions.logger.info(`[BATCH] 'Write App Data - ${docs} docs / ${commits.length} batches' took ${Date.now() - startTimeMs}ms`);
                    resolve(results);
                });
             
            }).catch(err => {
                reject(err);
            });
    
        });
    
    });
    
    
    // Parallel Individual Writes
    exports.cloneAppNoBatch = functions.https.onCall((data, context) => {
    
        return new Promise((resolve, reject) => {
    
            let fromAppKey = data.appKey;
            let toAppKey = db.collection('/app').doc().id;
    
    
            // Clone/copy data from one app subcollection to another
            let startTimeMs = Date.now();
            let docs = 0;
    
            // Write the app document (and ensure cold start doesn't affect timings below)
            db.collection('/app').doc(toAppKey).set({ desc: 'New App' }).then(() => {
    
                // Log Benchmark
                functions.logger.info(`[INDIVIDUAL] 'Write App Config Doc' took ${Date.now() - startTimeMs}ms`);
    
    
                // Get all documents in app subcollection
                startTimeMs = Date.now();
    
                return db.collection(`/app/${fromAppKey}/data`).get();
    
            }).then(appDataQS => {
    
                // Log Benchmark
                functions.logger.info(`[INDIVIDUAL] 'Read App Data' took ${Date.now() - startTimeMs}ms`);
    
    
                // Gather up documents and write to new app subcollection
                startTimeMs = Date.now();
    
                let commits = [];
    
                appDataQS.forEach(docSnap => {
    
                    let doc = docSnap.data();
                    let docKey = docSnap.id;
                    docs++;
                        
                    // Parallel individual writes
                    commits.push(db.collection(`/app/${toAppKey}/data`).doc(docKey).set(doc));
            
                });
    
                Promise.all(commits).then(results => {
                    // Log Benchmark
                    functions.logger.info(`[INDIVIDUAL] 'Write App Data - ${docs} docs' took ${Date.now() - startTimeMs}ms`);
                    resolve(results);
                });
             
            }).catch(err => {
                reject(err);
            });
    
        });
    
    });
    

    The specific results were (average of 3 runs each):

    Batch Writes:

    Read 1200 docs - 2.4 secs / Write 1200 docs - 1.8 secs

    Individual Writes:

    Read 1200 docs - 2.4 secs / Write 1200 docs - 10.5 secs

    Note: These results are a lot better than what I was getting the other day - maybe Google was having a bad day - but the relative performance between batch and individual writes remains the same. Would be good to see if anyone else has had a similar experience.

提交回复
热议问题