Mongo / Mongoose Aggregation - $redact and $cond issues

末鹿安然 提交于 2019-12-07 16:21:23

问题


I was fortunate enough to get an awesome answer to another SO question Mongo / Mongoose - Aggregating by Date from @chridam which given a set of documents like:

{ "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -33.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-04-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -61.3, "name" : "Amazon", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "Tesco", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -26.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -63.3, "name" : "Sky", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 }

required a query that would aggregate the spend by vendor, year, month and week. The query is below and it almost works fantastically but as I have used it in my application I have noticed a significant problem

db.statements.aggregate([
  { "$match": { "name": "RINGGO" } },
  {
  "$redact": {
      "$cond": [
          {
              "$and": [
                 { "$eq": [{ "$year": "$date" },  2017  ]}, // within my route this uses parseInt(req.params.year)
                 { "$eq": [{ "$month": "$date" }, 3 ]}, // within my route this uses parseInt(req.params.month)
                 { "$eq": [{ "$week": "$date" },  12  ]} // within my route this uses parseInt(req.params.week)
            ]
        },
        "$$KEEP",
        "$$PRUNE"
    ]
}
},{
    "$group": {
        "_id": {
            "name": "$name",
            "year": { "$year": "$date" },
            "month": { "$month": "$date" },
            "week": { "$week": "$date" }
        },
        "total": { "$sum": "$amount" }
    }
},
{
    "$group": {
        "_id": {
            "name": "$_id.name",
            "year": "$_id.year"
        },
        "YearlySpends": { "$push": "$total" },
        "totalYearlyAmount": { "$sum": "$total" },
        "data": { "$push": "$$ROOT" }
    }
},
{ "$unwind": "$data" },
{
    "$group": {
        "_id": {
            "name": "$_id.name",
            "month": "$data._id.month"
        },
        "YearlySpends": { "$first": "$YearlySpends" },
        "totalYearlyAmount": { "$first": "$totalYearlyAmount" },
        "MonthlySpends": { "$push": "$data.total" },
        "totalMonthlyAmount": { "$sum": "$data.total" },
        "data": { "$push": "$data" }
    }
},
{ "$unwind": "$data" },
{
    "$group": {
        "_id": {
            "name": "$_id.name",
            "week": "$data._id.week"
        },
        "YearlySpends": { "$first": "$YearlySpends" },
        "totalYearlyAmount": { "$first": "$totalYearlyAmount" },
        "MonthlySpends": { "$first": "$MonthlySpends" },
        "totalMonthlyAmount": { "$first": "$totalMonthlyAmount" },
        "WeeklySpends": { "$push": "$data.total" },
        "totalWeeklyAmount": { "$sum": "$data.total" },
        "data": { "$push": "$data" }
    }
},
{ "$unwind": "$data" },
{
    "$group": {
        "_id": "$data._id",
        "YearlySpends": { "$first": "$YearlySpends" },
        "totalYearlyAmount": { "$first": "$totalYearlyAmount" },
        "MonthlySpends": { "$first": "$MonthlySpends" },
        "totalMonthlyAmount": { "$first": "$totalMonthlyAmount" },
        "WeeklySpends": { "$first": "$WeeklySpends" },
        "totalWeeklyAmount": { "$first": "$totalWeeklyAmount" }
    }
}
])

Running this query returns

{ "_id" :
 { "name" : "RINGGO", 
   "year" : 2017, 
   "month" : 3, 
   "week" : 12 }, 
   "YearlySpends" : [ -9.6 ], 
   "totalYearlyAmount" : -9.6, 
   "MonthlySpends" : [ -9.6 ], 
   "totalMonthlyAmount" : -9.6, 
   "WeeklySpends" : [ -9.6 ], 
   "totalWeeklyAmount" : -9.6 
}

And when I change to wanting to see the month's spending

"$cond": [
          {
            "$and": [
                 { "$eq": [{ "$year": "$date" },  2017  ]},
                 { "$eq": [{ "$month": "$date" }, 3 ]}
            ]
          },
        "$$KEEP",
        "$$PRUNE"
      ]

I get:

{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 9 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 11 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 13 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 }

However when I run a simple db.statements.find({"name":"RINGGO"}) I get:

{ "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 }

So you can see that there is a different number of items in MonthlySpends in previous output compared to that shown in the output from the find by name. Also you can see that some of the values are being summed together in MonthlySpends when they shouldn't be.

Ideally I'm looking to get to an output which: when I have $redact containing:

"$cond": [
        {
            "$and": [
                 { "$eq": [{ "$year": "$date" },  2017  ]}, 
                 { "$eq": [{ "$month": "$date" }, 3 ]}, 
                 { "$eq": [{ "$week": "$date" },  12  ]} 
            ]
        },
        "$$KEEP",
        "$$PRUNE"
    ]

returns

{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }

when I have $redact containing:

"$cond": [
        {
            "$and": [
                 { "$eq": [{ "$year": "$date" },  2017  ]}, 
                 { "$eq": [{ "$month": "$date" }, 3 ]},
            ]
        },
        "$$KEEP",
        "$$PRUNE"
        ]

returns

{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997 }

when I have $redact containing:

"$cond": [
        {
            "$and": [
                 { "$eq": [{ "$year": "$date" },  2017  ]}
            ]
        },
        "$$KEEP",
        "$$PRUNE"
        ]

returns

{ "_id" : { "name" : "RINGGO", "year" : 2017 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997}

Any help in this much required. I've tried tinkering with the query but I'm afraid I just don't understand it enough to modify it correctly.

My Mongoose version is ^4.9.5 and my mongo is 3.4.2.


回答1:


You can try $facet with $addFields for parallel aggregation in 3.4 version.

This will reduce the overall complexity and you can run groupings with its own matching input at the same time.

The below code builds the aggregation pipeline dynamically based on request object.

// Sample request
var request = {
  "name":"RINGGO",
  "year": 2017,
  "month":3,
  "week":12
};

// Build initial match document on name

var match1 = {
  name: request["name"]
};

// Build project & facet document for date based aggregation

var addFields = {};
var facet = {};

// Add year followed by year facet

if (request["year"]) {
    addFields["year"] = { "$year": "$date" },
    facet["Yearly"] = 
      [
        {
          "$match":{ "year": request["year"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "year": "$year"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
        }
      }
    ];
}

// Add month followed by month facet

if (request["month"]) {
    addFields["month"] = { "$month": "$date" };
    facet["Monthly"] = 
      [
        {
          "$match":{ "month": request["month"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "month": "$month"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
         }
      }
    ];
}

// Add week followed by week facet

if (request["week"]) {
    addFields["week"] = { "$week": "$date" };
    facet["Weekly"] = 
      [
        {
          "$match":{ "week": request["week"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "week": "$week"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
         }
      }
    ];
}

// Use aggregate builder

statements.aggregate()
        .match(match1)
        .append({"$addFields": addFields}) // No addFields stage in mongoose builder
        .facet(facet)
        .exec(function(err, data) {});

Mongo Shell Query for name/year/month/week criteria.

db.statements.aggregate({
    '$match': {
        name: 'RINGGO'
    }
}, {
    '$addFields': {
        year: {
            '$year': '$date'
        },
        month: {
            '$month': '$date'
        },
        week: {
            '$week': '$date'
        }
    }
}, {
    '$facet': {
        Yearly: [{
                '$match': {
                    year: 2017
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        year: '$year'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ],
        Monthly: [{
                '$match': {
                    month: 3
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        month: '$month'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ],
        Weekly: [{
                '$match': {
                    week: 12
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        week: '$week'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ]
    }
})

Sample Response

    {
    "Yearly": [{
        "_id": {
            "name": "RINGGO",
            "year": 2017
        },
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    }],
    "Monthly": [{
        "_id": {
            "name": "RINGGO",
            "month": 3
        },
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    }],
    "Weekly": [{
        "_id": {
            "name": "RINGGO",
            "week": 12
        },
        "spend": [-6.3, -3.3],
        "total": -9.6
    }]
}

You can run the similar aggregation for Year/Month and Year input values.

So you can see that there is a different number of items in MonthlySpends in previous output compared to that shown in the output from the find by name. Also you can see that some of the values are being summed together in MonthlySpends when they shouldn't be.

This happens in $group 1 where $week aggregation rolls up each of two dates [15, 16] amount into week 11 and other two dates [22, 23] amount into week 12 later to show up as summed totals in MonthySpends.



来源:https://stackoverflow.com/questions/43950745/mongo-mongoose-aggregation-redact-and-cond-issues

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!