Remove all fields that are null

前端 未结 5 496
-上瘾入骨i
-上瘾入骨i 2021-01-02 11:03

How can I remove all fields that are null from all documents of a given collection?


I have a collection of documents such as:

{
           


        
5条回答
  •  误落风尘
    2021-01-02 11:48

    This is an important question since mongodb cannot index null values (i.e. do not query for nulls or you will be waiting for a long time), so it is best to entirely avoid nulls and set default values using setOnInsert.

    Here is a recursive solution to removing nulls:

    /**
     * RETRIEVES A LIST OF ALL THE KEYS IN A DOCUMENT, WHERE THE VALUE IS 'NULL' OR 'UNDEFINED'
     *
     * @param doc
     * @param keyName
     * @param nullKeys
     */
    function getNullKeysRecursively(doc, keyName, nullKeys)
    {
        for (var item_property in doc)
        {
            // SKIP BASE-CLASS STUFF
            if (!doc.hasOwnProperty(item_property))
                continue;
            // SKIP ID FIELD
            if (item_property === "_id")
                continue;
    
            // FULL KEY NAME (FOR SUB-DOCUMENTS)
            var fullKeyName;
            if (keyName)
                fullKeyName = keyName + "." + item_property;
            else
                fullKeyName = item_property;
    
            // DEBUGGING
            // print("fullKeyName: " + fullKeyName);
    
            // NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS
            if (doc[item_property] === null || doc[item_property] === undefined)
                nullKeys[fullKeyName] = 1;
    
            // RECURSE OBJECTS / ARRAYS
            else if (doc[item_property] instanceof Object || doc[item_property] instanceof Array)
                getNullKeysRecursively(doc[item_property], fullKeyName, nullKeys);
        }
    }
    
    /**
     * REMOVES ALL PROPERTIES WITH A VALUE OF 'NULL' OR 'UNDEFINED'.
     * TUNE THE 'LIMIT' VARIABLE TO YOUR MEMORY AVAILABILITY.
     * ONLY CLEANS DOCUMENTS THAT REQUIRE CLEANING, FOR EFFICIENCY.
     * USES bulkWrite FOR EFFICIENCY.
     *
     * @param collectionName
     */
    function removeNulls(collectionName)
    {
        var coll = db.getCollection(collectionName);
        var lastId = ObjectId("000000000000000000000000");
        var LIMIT = 10000;
        while (true)
        {
            // GET THE NEXT PAGE OF DOCUMENTS
            var page = coll.find({ _id: { $gt: lastId } }).limit(LIMIT);
            if (! page.hasNext())
                break;
    
            // BUILD BULK OPERATION
            var arrBulkOps = [];
            page.forEach(function(item_doc)
            {
                lastId = item_doc._id;
    
                var nullKeys = {};
                getNullKeysRecursively(item_doc, null, nullKeys);
    
                // ONLY UPDATE MODIFIED DOCUMENTS
                if (Object.keys(nullKeys).length > 0)
                // UNSET INDIVIDUAL FIELDS, RATHER THAN REWRITE THE ENTIRE DOC
                    arrBulkOps.push(
                        { updateOne: {
                                "filter": { _id: item_doc._id },
                                "update": { $unset: nullKeys }
                            } }
                    );
            });
    
            coll.bulkWrite(arrBulkOps, { ordered: false } );
        }
    }
    
    // GO GO GO
    removeNulls('my_collection');
    

    document before:

    {
        "_id": ObjectId("5a53ed8f6f7c4d95579cb87c"),
        "first_name": null,
        "last_name": "smith",
        "features": {
            "first": {
                "a": 1,
                "b": 2,
                "c": null
            },
            "second": null,
            "third" : {},
            "fourth" : []
        },
        "other": [ 
            null, 
            123, 
            {
                "a": 1,
                "b": "hey",
                "c": null
            }
        ]
    }
    

    document after:

    {
        "_id" : ObjectId("5a53ed8f6f7c4d95579cb87c"),
        "last_name" : "smith",
        "features" : {
            "first" : {
                "a" : 1,
                "b" : 2
            }
        },
        "other" : [ 
            null, 
            123, 
            {
                "a" : 1,
                "b" : "hey"
            }
        ]
    }
    

    As you can see, it removes null, undefined, empty objects and empty arrays. If you need it to be more/less aggressive, it is a matter of modifying the block "NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS".

    edits welcome, especially @stennie

提交回复
热议问题