Flattening mongoDB schema

◇◆丶佛笑我妖孽 提交于 2019-12-24 08:18:06

问题


I have an existing deeply nested mongoDB schema that I must flatten as I have a complex query that cannot be efficiently made with the current structure. Here is the MWE of the schema:

db.test.insert({
    "_id" : ObjectId("58e574a768afb6085ec3a388"),
    "details" : [
            {
                "_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
                "a" : [
                    {
                        "unit" : "08",
                        "size" : "5",
                        "pos" : "Far",
                        "_id" : ObjectId("58e55f0f68afb6085ec3a2d0")
                    }
                ],
                "b" : [
                    {
                        "unit" : "08",
                        "size" : "5",
                        "pos" : "Far",
                        "_id" : ObjectId("58e55f0f68afb6085ec3a2cd")
                    }
                ],
                "c" : [
                    {
                        "unit" : "08",
                        "size" : "3",
                        "pos" : "Far",
                        "_id" : ObjectId("58e55f0f68afb6085ec3a2ce")
                    }
                ],
                "d" : [
                    {
                        "unit" : "08",
                        "size" : "5",
                        "pos" : "Far",
                        "_id" : ObjectId("58e55f0f68afb6085ec3a2cf")
                    }
                ]
            }
        ]
    })

I want to flatten out the schema. The desired result is this:

"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
        {
            "_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
            "aUnit" : "08",
            "aSize" : "5",
            "aPos" : "Far",
            "bPos" : "Far",
            "bSize" : "5",
            "bUnit" : "08",
            "cPos" : "Far",
            "cSize" : "3",
            "cUnit" : "08",
            "dPos" : "Far",
            "dSize" : "5",
            "dUnit" : "08"
                }
            ]

I'm willing to do each entry type, one at a time, and I thought I had a method to do so but it is not working. Here is what I tried:

db.test.find({"tests.$.details.a.unit":{$exists:true}}).forEach(function(doc) {      
    doc.tests = {aUnit:tests.details.a.unit};
    delete tests.details.a.unit;
    db.test.save(doc);
    });

However, this changes nothing. How can I improve my query in order to flatten my schema?

EDITED: I realized that the MWE had a minor error as compared to the one I intended to use it on. I was closing each entry. For example, "a" : [{ ... }], was incorrectly written as {"a" : [{ ... }]},. However, it is now updated.


回答1:


New Response

Print the data

db.test.find().forEach(doc => {
  doc.details = doc.details.map( detail => {
    Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
      detail[k].forEach( item => {
        Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
          detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
            = item[inner];
        })
      });
      delete detail[k];
    });
    return detail;
  });
  printjson(doc);
});

Update the data

db.test.find().forEach(doc => {
  doc.details = doc.details.map( detail => {
    Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
      detail[k].forEach( item => {
        Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
          detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
            = item[inner];
        })
      });
      delete detail[k];
    });
    return detail;
  });

  ops = [
    ...ops,
    { "updateOne": {
      "filter": { "_id": doc._id },
      "update": { "$set": { "doc.details": doc.details } }
    }}
  ];

  if ( ops.length >= 500 ) {
    db.test.bulkWrite(ops);
    ops = [];
  }
});

if ( ops.length > 0 ) {
  db.test.bulkWrite(ops);
  ops = [];
}

Output Form

{
    "_id" : ObjectId("58e574a768afb6085ec3a388"),
    "details" : [
        {
          "_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
          "aUnit" : "08",
          "aSize" : "5",
          "aPos" : "Far",
          "bUnit" : "08",
          "bSize" : "5",
          "bPos" : "Far",
          "cUnit" : "08",
          "cSize" : "3",
          "cPos" : "Far",
          "dUnit" : "08",
          "dSize" : "5",
          "dPos" : "Far"
        }
    ]
}

Original Data

{
    "_id" : ObjectId("58e574a768afb6085ec3a388"),
    "tests" : [
      {
        "_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
        "details" : [
          {
            "a" : [
              {
                "unit" : "08",
                "size" : "5",
                "pos" : "Far",
                "_id" : ObjectId("58e542fb68afb6085ec3a1d6")
              }
            ]
          },
          {
            "b" : [
              {
                "pos" : "Drive Side Far",
                "size" : "5",
                "unit" : "08",
                "_id" : ObjectId("58e542fb68afb6085ec3a1d3")
              }
            ]
          },
          {
            "c" : [
              {
                "pos" : "Far",
                "size" : "3",
                "unit" : "08",
                "_id" : ObjectId("58e542fb68afb6085ec3a1d4")
              }
            ]
          },
          {
            "d" : [
              {
                "pos" : "Far",
                "size" : "5",
                "unit" : "08",
                "_id" : ObjectId("58e542fb68afb6085ec3a1d5")
              }
            ]
          }
        ]
      }
    ]
}

Original Answer

If you are trying "update" your data, then it's a lot more involved than what you are trying. You have several arrays and you need to actually "traverse" the array elements rather than trying to access them directly.

Here's just a sample to "print out" the "flattened" data:

db.test.find().forEach(doc => {
  doc.tests = doc.tests.map( test => {
    test.details.forEach( detail => {
      Object.keys(detail).forEach( key => {
        detail[key].forEach( item => {
          Object.keys(item).forEach( inner => {
            if ( inner !== '_id' ) {
              test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
                = item[inner];
            }
          });
        });
      });
    });
    delete test.details;
    return test;
  });
  printjson(doc);
})

Which I believe gives the structure you are looking for:

{
    "_id" : ObjectId("58e574a768afb6085ec3a388"),
    "tests" : [
        {
            "_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
            "aUnit" : "08",
            "aSize" : "5",
            "aPos" : "Far",
            "bPos" : "Drive Side Far",
            "bSize" : "5",
            "bUnit" : "08",
            "cPos" : "Far",
            "cSize" : "3",
            "cUnit" : "08",
            "dPos" : "Far",
            "dSize" : "5",
            "dUnit" : "08"
        }
    ]

}

Now I'm not taking into account any possibility that inside your "details" array the documents with keys like "a" etc could maybe appear multiple times. So I am just considering that there is only ever 1 document inside there which has a an "a" or a "b" etc, and the last found value matching that key is always assigned when adding the new keys to the top level of the "details" documents.

If you're actual case varies, then you would need to modify various .forEach() loops inside there to also use the "index" as a parameter and include that index value as part of the key name. i.e:

"a0Unit": "08",
"a0Size": "05",
"a1Unit": "09",
"a1Size": "06"

But that is a detail you will have to work out if necessary since this would differ from how the data is presented in the question.

If however this is a perfect fit for what you want to update to, then simply run the loop with .bulkWrite() statements executing at regular intervals:

let ops = [];

db.test.find().forEach(doc => {
  doc.tests = doc.tests.map( test => {
    test.details.forEach( detail => {
      Object.keys(detail).forEach( key => {
        detail[key].forEach( item => {
          Object.keys(item).forEach( inner => {
            if ( inner !== '_id' ) {
              test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
                = item[inner];
            }
          });
        });
      });
    });
    delete test.details;
    return test;
  });

  ops = [
    ...ops,
    { "updateOne": {
      "filter": { "_id": doc._id },
      "update": { "$set": { "tests": doc.tests } }
    }}
  ];

  if ( ops.length >= 500 ) {
    db.test.bulkWrite(ops);
    ops = [];
  }
});

if ( ops.length > 0 ) {
  db.test.bulkWrite(ops);
  ops = [];
}

It also appears from the _id fields present in each array member document that you are using mongoose. So whatever you do, do not try and run the code using mongoose itself. It's a "one off" bulk update of your data and should be run directly from the shell. Then of course you will need to modify your schema to suit the new structure.

But this is why you should run through your data in the shell with the printjson() method first.




回答2:


The following

db.collection.aggregate(
    [{$unwind:"$tests"},
    {$unwind:"$tests.details"},
    {$unwind:"$tests.details.a"},
    {$group:{
        _id:"$_id",
        "tests": {"$push":{
            "aPos":"$tests.details.a.pos",
            "aSize":"$tests.details.a.size",
            "aUnit":"$tests.details.a.unit"
        }}}},
    ])

produces:

{ "_id" : ObjectId("58e574a768afb6085ec3a388"), "tests" : [ { "aPos" : "Far", "aSize" : "5", "aUnit" : "08" } ] }

The above only yielded one set of field:value pairs; doing multiple $unwind at the same level did not work:

db.collection.aggregate(
    [{$unwind:"$tests"},
    {$unwind:"$tests.details"},
    {$unwind:"$tests.details.a"},
    {$unwind:"$tests.details.b"},
    {$group:{
        _id:"$_id",
        "tests": {"$push":{
            "aPos":"$tests.details.a.pos",
            "aSize":"$tests.details.a.size",
            "aUnit":"$tests.details.a.unit",
            "bPos":"$tests.details.b.pos",
            "bSize":"$tests.details.b.size",
            "bUnit":"$tests.details.b.unit"
        }}}},
    ])  //does not run

Therefore, there needs to be another aggregation stage of $facet to carry out similar steps for details.b, details.c and details.d.



来源:https://stackoverflow.com/questions/44358595/flattening-very-deeply-nested-mongodb-data

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!