Grouping/counting sub-documents while preserving root fields

问题

In mongodb, after a few $match and $project, I get the following 2 documents. I'm trying to figure out how to group/count together the list of states for each team in each group of each event. In short, I need to know how many teams are in each state (0, 1 or 2). I start out with the following documents.

{ 
    "_id" : "event1", 
    "groups" : [
        {
            "_id" : "group1", 
            "wlActive" : true, 
            "teams" : [
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(0)}, 
                {"state" : NumberInt(0)} 
            ]
        }, 
        {
            "_id" : "group2", 
            "wlActive" : false, 
            "teams" : [
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(0)}, 
                {"state" : NumberInt(0)} 
            ]
        }
    ]
},
{ 
    "_id" : "event2", 
    "groups" : [
        {
            "_id" : "group3", 
            "wlActive" : true, 
            "teams" : [
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(0)}, 
                {"state" : NumberInt(0)} 
            ]
        }, 
        {
            "_id" : "group4",
            "wlActive" : false, 
            "teams" : [
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(2)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(1)}, 
                {"state" : NumberInt(0)}, 
                {"state" : NumberInt(0)} 
            ]
        }
    ]
}

What I'm hoping to end up with would be something like this:

{ 
    "_id" : "event1", 
    "groups" : [
        {
            "_id" : "group1", 
            "wlActive" : true, 
            "states":[
                {"state":NumberInt(2), count:2},
                {"state":NumberInt(1), count:3},
                {"state":NumberInt(0), count:2}
            }
        }, 
        {
            "_id" : "group2", 
            "wlActive" : false, 
            "states":[
                {"state":NumberInt(2), count:2},
                {"state":NumberInt(1), count:3},
                {"state":NumberInt(0), count:2}
            }
        }
    ]
},
{ 
    "_id" : "event2", 
    "groups" : [
        {
            "_id" : "group3", 
            "wlActive" : true, 
            "states":[
                {"state":NumberInt(2), count:2},
                {"state":NumberInt(1), count:3},
                {"state":NumberInt(0), count:2}
            }
        }, 
        {
            "_id" : "group4",
            "wlActive" : false, 
            "states":[
                {"state":NumberInt(2), count:2},
                {"state":NumberInt(1), count:3},
                {"state":NumberInt(0), count:2}
            }
        }
    ]
}

It does not need to be exactly this but as long as I can get a count of each team state and also preserve fields such as "wlActive" for each group. I've seen similar examples on here but I just can't seem to get this one solved.

回答1:

You can actually just do this with a $addFields or $project

db.collection.aggregate([
  { "$addFields": {
    "groups": {
      "$map": {
        "input": "$groups",
        "in": {
          "$mergeObjects": [
            "$$this",
            { "teams": {
              "$reduce": {
                "input": "$$this.teams",
                "initialValue": [ ],
                "in": {
                  "$cond": {
                    "if": { 
                      "$ne": [ { "$indexOfArray":  ["$$value.state", "$$this.state"] }, -1 ]
                    },
                    "then": {
                      "$concatArrays": [
                        { "$filter": {
                          "input": "$$value",
                          "as": "v",
                          "cond": { "$ne": [ "$$v.state", "$$this.state" ]  }
                        }},
                        [{
                          "state": "$$this.state",
                          "count": { "$sum": [
                            { "$arrayElemAt": [
                              "$$value.count",
                              { "$indexOfArray": ["$$value.state", "$$this.state" ] }
                            ]},
                            1
                          ]}
                        }]
                      ]
                    },
                    "else": {
                      "$concatArrays": [
                        "$$value",
                        [{ "state": "$$this.state", "count": 1 }]
                      ]
                    }
                  }
                }
              }
            }}
          ]
        }
      }
    }
  }}
])

That's pretty complex and basically using $reduce "inline" as a substitute for the $group pipeline operator.

The $reduce is the main part of the work as it iterates each array item "reducing" to another array with the "grouped" totals on keys. It does this by looking for the value of state within the current reduced result via $indexOfArray. When something is not found ( -1 returned ) it appends to the current result via $concatArrays with a new state and count of 1. This is the else case.

When something is found ( the then case ), we remove the matched element from the result array via $filter and concatenate a new element from the matched index of $indexOfArray and extract the value using $arrayElemAt. This gives the current count of the matched element which is added using $sum in order to increment the count by 1.

Of course you can traditionally just do that with $unwind and $group statements:

db.collection.aggregate([
  { "$unwind": "$groups" },
  { "$unwind": "$groups.teams" },
  { "$group": {
    "_id": {
      "_id": "$_id",
      "gId": "$groups._id",
      "wlActive": "$groups.wlActive",
      "state": "$groups.teams.state"
    },
    "count": { "$sum": 1 }
  }},
  { "$sort": { "_id": -1, "count": -1 } },
  { "$group": {
    "_id": {
      "_id": "$_id._id",
      "gId": "$_id.gId",
      "wlActive": "$_id.wlActive",
    },
    "teams": { "$push": { "state": "$_id.state", "count": "$count" } }
  }},
  { "$group": {
    "_id": "$_id._id",
    "groups": {
      "$push": {
        "_id": "$_id.gId",
        "wlActive": "$_id.wlActive",
        "teams": "$teams"
      }
    }
  }}
])

Here $unwind is used to "flatten" the array content into separate documents. You do this down to the teams level and $group on the compound key which identifies uniqueness down to the state level.

Since all the document detail is part of the initial $group key, you remove the level of "uniqueness" so teams becomes an array using $push. In order to get back to the original document form, another $group is done on the original _id value for the documents and the $push reconstructs the groups array.

That form is probably "easier" to comprehend, however it does take considerably longer to run and takes more resources. The first form is optimal since you don't actually need to $group within an existing document, and you generally should avoid $unwind unless absolutely necessary. i.e Grouping state across all documents is necessary, but within a single document it is not.

Either way basically returns the same result:

{
        "_id" : "event1",
        "groups" : [
                {
                        "_id" : "group1",
                        "wlActive" : true,
                        "teams" : [
                                {
                                        "state" : 2,
                                        "count" : 2
                                },
                                {
                                        "state" : 1,
                                        "count" : 3
                                },
                                {
                                        "state" : 0,
                                        "count" : 2
                                }
                        ]
                },
                {
                        "_id" : "group2",
                        "wlActive" : false,
                        "teams" : [
                                {
                                        "state" : 2,
                                        "count" : 2
                                },
                                {
                                        "state" : 1,
                                        "count" : 3
                                },
                                {
                                        "state" : 0,
                                        "count" : 2
                                }
                        ]
                }
        ]
}
{
        "_id" : "event2",
        "groups" : [
                {
                        "_id" : "group3",
                        "wlActive" : true,
                        "teams" : [
                                {
                                        "state" : 2,
                                        "count" : 2
                                },
                                {
                                        "state" : 1,
                                        "count" : 3
                                },
                                {
                                        "state" : 0,
                                        "count" : 2
                                }
                        ]
                },
                {
                        "_id" : "group4",
                        "wlActive" : false,
                        "teams" : [
                                {
                                        "state" : 2,
                                        "count" : 2
                                },
                                {
                                        "state" : 1,
                                        "count" : 3
                                },
                                {
                                        "state" : 0,
                                        "count" : 2
                                }
                        ]
                }
        ]
}

For what it's worth, since this is not really "aggregating" anything across documents you can just as simply return all the data and "aggregate" the array items within client side code.

As a mongo shell example:

db.collection.find().map(doc => Object.assign({}, doc, {
  _id: doc._id,
  groups: doc.groups.map(g => Object.assign({}, g, {
    _id: g._id,
    wlActive: g.wlActive,
    teams: ((input) => {
      var obj = input.reduce((o, e) => 
      (o.hasOwnProperty(e.state)) ? 
        Object.assign({} , o, { [e.state]: o[e.state]+1 })
        : Object.assign({}, o, { [e.state]: 1 }),  {});
      return Object.keys(obj)
        .map(k => ({ state: parseInt(k), count: obj[k] }))
        .sort((a,b) => b.state - a.state);
    })(g.teams)
  }))
}))

来源：https://stackoverflow.com/questions/54992837/grouping-counting-sub-documents-while-preserving-root-fields

标签

mongodb

mongodb-query

aggregation-framework