Is there any option to perform bulk upserts with mongoose? So basically having an array and insert each element if it not exists or update it if it exists? (I am using custo
I had to achieve this recently while storing products in my ecommerce app. My database used to timeout as I had to upsert 10000 items every 4 hours. One option for me was to set the socketTimeoutMS and connectTimeoutMS in mongoose while connecting to the database but it sorta felt hacky and I did not want to manipulate connection timeout defaults of the database. I also see that the solution by @neil lunn takes a simple sync approach of taking a modulus inside the for loop. Here is an async version of mine that I believe does the job much better
let BATCH_SIZE = 500
Array.prototype.chunk = function (groupsize) {
var sets = [];
var chunks = this.length / groupsize;
for (var i = 0, j = 0; i < chunks; i++ , j += groupsize) {
sets[i] = this.slice(j, j + groupsize);
}
return sets;
}
function upsertDiscountedProducts(products) {
//Take the input array of products and divide it into chunks of BATCH_SIZE
let chunks = products.chunk(BATCH_SIZE), current = 0
console.log('Number of chunks ', chunks.length)
let bulk = models.Product.collection.initializeUnorderedBulkOp();
//Get the current time as timestamp
let timestamp = new Date(),
//Keep track of the number of items being looped
pendingCount = 0,
inserted = 0,
upserted = 0,
matched = 0,
modified = 0,
removed = 0,
//If atleast one upsert was performed
upsertHappened = false;
//Call the load function to get started
load()
function load() {
//If we have a chunk to process
if (current < chunks.length) {
console.log('Current value ', current)
for (let i = 0; i < chunks[current].length; i++) {
//For each item set the updated timestamp to the current time
let item = chunks[current][i]
//Set the updated timestamp on each item
item.updatedAt = timestamp;
bulk.find({ _id: item._id })
.upsert()
.updateOne({
"$set": item,
//If the item is being newly inserted, set a created timestamp on it
"$setOnInsert": {
"createdAt": timestamp
}
})
}
//Execute the bulk operation for the current chunk
bulk.execute((error, result) => {
if (error) {
console.error('Error while inserting products' + JSON.stringify(error))
next()
}
else {
//Atleast one upsert has happened
upsertHappened = true;
inserted += result.nInserted
upserted += result.nUpserted
matched += result.nMatched
modified += result.nModified
removed += result.nRemoved
//Move to the next chunk
next()
}
})
}
else {
console.log("Calling finish")
finish()
}
}
function next() {
current++;
//Reassign bulk to a new object and call load once again on the new object after incrementing chunk
bulk = models.Product.collection.initializeUnorderedBulkOp();
setTimeout(load, 0)
}
function finish() {
console.log('Inserted ', inserted + ' Upserted ', upserted, ' Matched ', matched, ' Modified ', modified, ' Removed ', removed)
//If atleast one chunk was inserted, remove all items with a 0% discount or not updated in the latest upsert
if (upsertHappened) {
console.log("Calling remove")
remove()
}
}
/**
* Remove all the items that were not updated in the recent upsert or those items with a discount of 0
*/
function remove() {
models.Product.remove(
{
"$or":
[{
"updatedAt": { "$lt": timestamp }
},
{
"discount": { "$eq": 0 }
}]
}, (error, obj) => {
if (error) {
console.log('Error while removing', JSON.stringify(error))
}
else {
if (obj.result.n === 0) {
console.log('Nothing was removed')
} else {
console.log('Removed ' + obj.result.n + ' documents')
}
}
}
)
}
}