问题
I have an existing deeply nested mongoDB schema that I must flatten as I have a complex query that cannot be efficiently made with the current structure. Here is the MWE of the schema:
db.test.insert({
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"details" : [
{
"_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
"a" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2d0")
}
],
"b" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2cd")
}
],
"c" : [
{
"unit" : "08",
"size" : "3",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2ce")
}
],
"d" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e55f0f68afb6085ec3a2cf")
}
]
}
]
})
I want to flatten out the schema. The desired result is this:
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bPos" : "Far",
"bSize" : "5",
"bUnit" : "08",
"cPos" : "Far",
"cSize" : "3",
"cUnit" : "08",
"dPos" : "Far",
"dSize" : "5",
"dUnit" : "08"
}
]
I'm willing to do each entry type, one at a time, and I thought I had a method to do so but it is not working. Here is what I tried:
db.test.find({"tests.$.details.a.unit":{$exists:true}}).forEach(function(doc) {
doc.tests = {aUnit:tests.details.a.unit};
delete tests.details.a.unit;
db.test.save(doc);
});
However, this changes nothing. How can I improve my query in order to flatten my schema?
EDITED: I realized that the MWE had a minor error as compared to the one I intended to use it on. I was closing each entry. For example, "a" : [{ ... }], was incorrectly written as {"a" : [{ ... }]},. However, it is now updated.
回答1:
New Response
Print the data
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
printjson(doc);
});
Update the data
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "doc.details": doc.details } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
Output Form
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"details" : [
{
"_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bUnit" : "08",
"bSize" : "5",
"bPos" : "Far",
"cUnit" : "08",
"cSize" : "3",
"cPos" : "Far",
"dUnit" : "08",
"dSize" : "5",
"dPos" : "Far"
}
]
}
Original Data
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"details" : [
{
"a" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e542fb68afb6085ec3a1d6")
}
]
},
{
"b" : [
{
"pos" : "Drive Side Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d3")
}
]
},
{
"c" : [
{
"pos" : "Far",
"size" : "3",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d4")
}
]
},
{
"d" : [
{
"pos" : "Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d5")
}
]
}
]
}
]
}
Original Answer
If you are trying "update" your data, then it's a lot more involved than what you are trying. You have several arrays and you need to actually "traverse" the array elements rather than trying to access them directly.
Here's just a sample to "print out" the "flattened" data:
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
printjson(doc);
})
Which I believe gives the structure you are looking for:
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bPos" : "Drive Side Far",
"bSize" : "5",
"bUnit" : "08",
"cPos" : "Far",
"cSize" : "3",
"cUnit" : "08",
"dPos" : "Far",
"dSize" : "5",
"dUnit" : "08"
}
]
}
Now I'm not taking into account any possibility that inside your "details" array the documents with keys like "a" etc could maybe appear multiple times. So I am just considering that there is only ever 1 document inside there which has a an "a" or a "b" etc, and the last found value matching that key is always assigned when adding the new keys to the top level of the "details" documents.
If you're actual case varies, then you would need to modify various .forEach() loops inside there to also use the "index" as a parameter and include that index value as part of the key name. i.e:
"a0Unit": "08",
"a0Size": "05",
"a1Unit": "09",
"a1Size": "06"
But that is a detail you will have to work out if necessary since this would differ from how the data is presented in the question.
If however this is a perfect fit for what you want to update to, then simply run the loop with .bulkWrite() statements executing at regular intervals:
let ops = [];
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "tests": doc.tests } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
It also appears from the _id fields present in each array member document that you are using mongoose. So whatever you do, do not try and run the code using mongoose itself. It's a "one off" bulk update of your data and should be run directly from the shell. Then of course you will need to modify your schema to suit the new structure.
But this is why you should run through your data in the shell with the printjson() method first.
回答2:
The following
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit"
}}}},
])
produces:
{ "_id" : ObjectId("58e574a768afb6085ec3a388"), "tests" : [ { "aPos" : "Far", "aSize" : "5", "aUnit" : "08" } ] }
The above only yielded one set of field:value pairs; doing multiple $unwind at the same level did not work:
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$unwind:"$tests.details.b"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit",
"bPos":"$tests.details.b.pos",
"bSize":"$tests.details.b.size",
"bUnit":"$tests.details.b.unit"
}}}},
]) //does not run
Therefore, there needs to be another aggregation stage of $facet to carry out similar steps for details.b, details.c and details.d.
来源:https://stackoverflow.com/questions/44358595/flattening-very-deeply-nested-mongodb-data