mongo $sum compounded when doing $unwind and then $group on multiple fields

随声附和 提交于 2019-12-23 10:07:03

问题


I have the following document structure

{
    "app_id": "DHJFK67JDSJjdasj909",
    "date": ISODate("2014-08-07T00:00:00.000Z"),
    "event_count": 100,
    "events": [
        { "type": 0,  "value": 12  },
        { "type": 10, "value": 24 },
        { "type": 20, "value": 36  },
        { "type": 30, "value": 43 }
    ],
    "unique_events": [
        { "type": 0,  "value": 5  },
        { "type": 10, "value": 8 },
        { "type": 20, "value": 12  },
        { "type": 30, "value": 56 }
    ]
}

I am trying to get a sum of event_counts and also the values for unique_events and events per type. This is the kind of output I am expecting where the event_count and each events and unique_events value has been summed per type.

{
    "app_id": "DHJFK67JDSJjdasj909",
    "date": ISODate("2014-08-07T00:00:00.000Z"),
    "event_count": 4345,
    "events": [
        { "type": 0,  "value": 624  },
        { "type": 10, "value": 234 },
        { "type": 20, "value": 353 },
        { "type": 30, "value": 472 }
    ],
    "unique_events": [
        { "type": 0,  "value": 433  },
        { "type": 10, "value": 554 },
        { "type": 20, "value": 645  },
        { "type": 30, "value": 732 }
    ]
}

This is my query

db.events.aggregate([
    { "$unwind": "$events" },
    { "$group": {
        "_id": { 
            "app_id": "$app_id",
            "type": "$events.type"
            "unique_type": "$unique_events.type"
        },
        "event_count": { "$sum": "$event_count" },
        "event_value": { "$sum": "$events.value" },
        "unique_event_value": { "$sum": "$unique_events.value" }
    }},
    { "$group": {
        "_id": "$_id.app_id",
        "event_count": { "$sum": "$event_count" },
        "events": { "$push": { "type": "$_id.type", "value": "$event_value" } }
        "unique_events": { "$push": { "type": "$_id.unique_type", "value": "$unique_event_value" } }
    }}
]) 

The problem is that using two $unwinds and then grouping by both events and unique_events is causing the $sum to be compounded and much too large. Is there some way I can fix this using mongo or do I have to run two queries and then merge the two result sets in code.

Thanks

Irfan


回答1:


This is simple enough really, to sum the results for each array it's just a matter of discerning between which is which and "combining the elements". In short you should probably be doing this in you documents anyway as should be evident from the first pipeline stage(s).

So for MongoDB 2.6 and greater there are some helper methods:

db.events.aggregate([
    { "$project": {
        "app_id": 1,
        "event_count": 1,
        "all_events": {
            "$setUnion": [
                { "$map": {
                    "input": "$events",
                    "as": "el",
                    "in": {
                        "type": "$$el.type",
                        "value": "$$el.value",
                        "class": { "$literal": "A" }
                    }
                }},
                { "$map": {
                    "input": "$unique_events",
                    "as": "el",
                    "in": {
                        "type": "$$el.type",
                        "value": "$$el.value",
                        "class": { "$literal": "B" }
                    }
                }}
            ]
        }
    }},
    { "$unwind": "$all_events" },
    { "$group": {
        "_id": {
            "app_id": "$app_id",
            "class": "$all_events.class",
            "type": "$all_events.type"
        },
        "event_count": { "$sum": "$event_count" },
        "value": { "$sum": "$all_events.value" }
    }},
    { "$group": {
        "_id": "$_id.app_id",
        "event_count": { "$sum": "$event_count" },
        "events": {
            "$push": {
                "$cond": [
                    { "$eq": [ "$_id.class", "A" ] },
                    { "type": "$_id.type", "value": "$value" },
                    false
                ]
            }
        },
        "unique_events": {
            "$push": {
                "$cond": [
                    { "$eq": [ "$_id.class", "B" ] },
                    { "type": "$_id.type", "value": "$value" },
                    false
                ]
            }
        }
    }},
    { "$project": {
        "event_count": 1,
        "events": { "$setDifference": [ "$events", [false] ] },
        "unique_events": {
            "$setDifference": [ "$unique_events", [false] ]
        }
    }}
])

Mostly in the $setUnion and $setDifference operators. The other ccase is $map, which processes arrays in place. The whole thing there is doing operations on arrays without the use of $unwind. But those can of course be done in prior versions, it just takes a bit more work:

db.events.aggregate([
    { "$unwind": "$events" },
    { "$group": {
        "_id": "$_id",
        "app_id": { "$first": "$app_id" },
        "event_count": { "$first": "$event_count" },
        "events": {
            "$push": {
                "type": "$events.type",
                "value": "$events.value",
                "class": { "$const": "A" }
            }
        },
        "unique_events": { "$first": "$unique_events" }            
    }},
    { "$unwind": "$unique_events" },
    { "$group": {
        "_id": "$_id",
        "app_id": { "$first": "$app_id" },
        "event_count": { "$first": "$event_count" },
        "events": { "$first": "$events" },
        "unique_events": {
            "$push": {
                "type": "$unique_events.type",
                "value": "$unique_events.value",
                "class": { "$const": "B" }
            }
        }
    }},
    { "$project": {
        "app_id": 1,
        "event_count": 1,
        "events": 1,
        "unique_events": 1,
        "type": { "$const": [ "A","B" ] }
    }},
    { "$unwind": "$type" },
    { "$unwind": "$events" },
    { "$unwind": "$unique_events" },
    { "$group": {
        "_id": "$_id",
        "app_id": { "$first": "$app_id" },
        "event_count": { "$first": "$event_count" },
        "all_events": {
            "$addToSet": {
                "$cond": [
                     { "$eq": [ "$events.class", "$type" ] },
                     {
                         "type": "$events.type",
                         "value": "$events.value",
                         "class": "$events.class"
                     },
                     {
                         "type": "$unique_events.type",
                         "value": "$unique_events.value",
                         "class": "$unique_events.class"
                     }
                ]
            }
        }
    }},
    { "$unwind": "$all_events" },
   { "$group": {
        "_id": {
            "app_id": "$app_id",
            "class": "$all_events.class",
            "type": "$all_events.type"
        },
        "event_count": { "$sum": "$event_count" },
        "value": { "$sum": "$all_events.value" }
    }},
    { "$group": {
        "_id": "$_id.app_id",
        "event_count": { "$sum": "$event_count" },
        "events": {
            "$push": {
                "$cond": [
                    { "$eq": [ "$_id.class", "A" ] },
                    { "type": "$_id.type", "value": "$value" },
                    false
                ]
            }
        },
        "unique_events": {
            "$push": {
                "$cond": [
                    { "$eq": [ "$_id.class", "B" ] },
                    { "type": "$_id.type", "value": "$value" },
                    false
                ]
            }
        }
    }},
    { "$unwind": "$events" },
    { "$match": { "events": { "$ne": false } } },
    { "$group": {
        "_id": "$_id",
        "event_count": { "$first": "$event_count" },
        "events": { "$push": "$events" },
        "unique_events": { "$first": "$unique_events" }
    }},
    { "$unwind": "$unique_events" },
    { "$match": { "unique_events": { "$ne": false } } },
    { "$group": {
       "_id": "$_id",
        "event_count": { "$first": "$event_count" },
        "events": { "$first": "$events" },
        "unique_events": { "$push": "$unique_events" }
    }}
])

That gets you the results you want with each array being "summed" together as well as the master "event_count" with the correct result.

You probably should consider combining both of those arrays with a similar identifier to what has been used in the pipelines as demonstrated. This part is half of the work. The other half is considering that you probably should store pre-aggregated results in a collection somewhere for the best application performance.




回答2:


You can perform Following Map reduce:
Its not a dynamic solution, i have created variable for each events and unique_events.
I have saved output in different collection using out: "session_stat" in mapReduce Function.

var mapFunction = function() {
                      var key = this.app_id;
                      var value = {                                 
                                    event_count: this.event_count,
                                    events: this.events,
                                    unique_events: this.unique_events
                                   };

                      emit( key, value );
                  };

var reduceFunction = function(key, values) {

                        var reducedObject = {
                                              app_id: key,
                                              events_wise_total: 0,
                                              unique_events_wise_total:0,
                                              total_event_count:0
                                            };

                        var events_wise_total = [];
                        var events_0_total = { type:0, value :0};
                        var events_10_total = { type:10, value :0};
                        var events_20_total = { type:20, value :0};
                        var events_30_total = { type:30, value :0};

                        var unique_events_wise_total = [];
                        var unique_events_0_total = { type:0, value :0};
                        var unique_events_10_total = { type:10, value :0};
                        var unique_events_20_total = { type:20, value :0};
                        var unique_events_30_total = { type:30, value :0};

                        var total_event_count = 0;
                        values.forEach( function(value) {
                                total_event_count += value.event_count;
                                var events = value.events;

                                events.forEach(function(event){
                                                if(event.type == 0){events_0_total.value += event.value;}
                                                if(event.type == 10){events_10_total.value += event.value;}
                                                if(event.type == 20){events_20_total.value += event.value;}
                                                if(event.type == 30){events_30_total.value += event.value;}
                                        });

                                var unique_events = value.unique_events;

                                unique_events.forEach(function(unique_event){
                                                if(unique_event.type == 0){unique_events_0_total.value += unique_event.value;}
                                                if(unique_event.type == 10){unique_events_10_total.value += unique_event.value;}
                                                if(unique_event.type == 20){unique_events_20_total.value += unique_event.value;}
                                                if(unique_event.type == 30){unique_events_30_total.value += unique_event.value;}
                                        }); 
                            }
                          );
                        events_wise_total.push(events_0_total);
                        events_wise_total.push(events_10_total);
                        events_wise_total.push(events_20_total);
                        events_wise_total.push(events_30_total);

                        unique_events_wise_total.push(unique_events_0_total);
                        unique_events_wise_total.push(unique_events_10_total);
                        unique_events_wise_total.push(unique_events_20_total);
                        unique_events_wise_total.push(unique_events_30_total);

                        reducedObject.events_wise_total = events_wise_total;
                        reducedObject.unique_events_wise_total = unique_events_wise_total;
                        reducedObject.total_event_count = total_event_count;

                        return reducedObject;
                     };

var finalizeFunction = function (key, reducedValue) {
                          return reducedValue;
                       };                    

db.GroupBy.mapReduce(
                       mapFunction,
                       reduceFunction,
                       {
                         out: "session_stat",
                         finalize: finalizeFunction
                       });

Hope it may be useful



来源:https://stackoverflow.com/questions/25201157/mongo-sum-compounded-when-doing-unwind-and-then-group-on-multiple-fields

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!