I need to write a large number of documents to Firestore.
What is the fastest way to do this in Node.js?
As noted in a comment to the OP, I've had the opposite experience when writing documents to Firestore inside a Cloud Function.
TL;DR: Parallel individual writes are over 5x slower than parallel batch writes when writing 1200 documents to Firestore.
The only explanation I can think of for this, is some sort of bottleneck or request rate limiting happening between Google cloud functions and Firestore. It's a bit of a mystery.
Here's the code for both methods I benchmarked:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
const db = admin.firestore();
// Parallel Batch Writes
exports.cloneAppBatch = functions.https.onCall((data, context) => {
return new Promise((resolve, reject) => {
let fromAppKey = data.appKey;
let toAppKey = db.collection('/app').doc().id;
// Clone/copy data from one app subcollection to another
let startTimeMs = Date.now();
let docs = 0;
// Write the app document (and ensure cold start doesn't affect timings below)
db.collection('/app').doc(toAppKey).set({ desc: 'New App' }).then(() => {
// Log Benchmark
functions.logger.info(`[BATCH] 'Write App Config Doc' took ${Date.now() - startTimeMs}ms`);
// Get all documents in app subcollection
startTimeMs = Date.now();
return db.collection(`/app/${fromAppKey}/data`).get();
}).then(appDataQS => {
// Log Benchmark
functions.logger.info(`[BATCH] 'Read App Data' took ${Date.now() - startTimeMs}ms`);
// Batch up documents and write to new app subcollection
startTimeMs = Date.now();
let commits = [];
let bDocCtr = 0;
let batch = db.batch();
appDataQS.forEach(docSnap => {
let doc = docSnap.data();
let docKey = docSnap.id;
docs++;
let docRef = db.collection(`/app/${toAppKey}/data`).doc(docKey);
batch.set(docRef, doc);
bDocCtr++
if (bDocCtr >= 500) {
commits.push(batch.commit());
batch = db.batch();
bDocCtr = 0;
}
});
if (bDocCtr > 0) commits.push(batch.commit());
Promise.all(commits).then(results => {
// Log Benchmark
functions.logger.info(`[BATCH] 'Write App Data - ${docs} docs / ${commits.length} batches' took ${Date.now() - startTimeMs}ms`);
resolve(results);
});
}).catch(err => {
reject(err);
});
});
});
// Parallel Individual Writes
exports.cloneAppNoBatch = functions.https.onCall((data, context) => {
return new Promise((resolve, reject) => {
let fromAppKey = data.appKey;
let toAppKey = db.collection('/app').doc().id;
// Clone/copy data from one app subcollection to another
let startTimeMs = Date.now();
let docs = 0;
// Write the app document (and ensure cold start doesn't affect timings below)
db.collection('/app').doc(toAppKey).set({ desc: 'New App' }).then(() => {
// Log Benchmark
functions.logger.info(`[INDIVIDUAL] 'Write App Config Doc' took ${Date.now() - startTimeMs}ms`);
// Get all documents in app subcollection
startTimeMs = Date.now();
return db.collection(`/app/${fromAppKey}/data`).get();
}).then(appDataQS => {
// Log Benchmark
functions.logger.info(`[INDIVIDUAL] 'Read App Data' took ${Date.now() - startTimeMs}ms`);
// Gather up documents and write to new app subcollection
startTimeMs = Date.now();
let commits = [];
appDataQS.forEach(docSnap => {
let doc = docSnap.data();
let docKey = docSnap.id;
docs++;
// Parallel individual writes
commits.push(db.collection(`/app/${toAppKey}/data`).doc(docKey).set(doc));
});
Promise.all(commits).then(results => {
// Log Benchmark
functions.logger.info(`[INDIVIDUAL] 'Write App Data - ${docs} docs' took ${Date.now() - startTimeMs}ms`);
resolve(results);
});
}).catch(err => {
reject(err);
});
});
});
The specific results were (average of 3 runs each):
Batch Writes:
Read 1200 docs - 2.4 secs / Write 1200 docs - 1.8 secs
Individual Writes:
Read 1200 docs - 2.4 secs / Write 1200 docs - 10.5 secs
Note: These results are a lot better than what I was getting the other day - maybe Google was having a bad day - but the relative performance between batch and individual writes remains the same. Would be good to see if anyone else has had a similar experience.