问题
I have collection of 113 documents. It is data from departments of university. There are 3 types of departments:
natural science departments => "chairtype" = "E",
humanities departments => "chairtype" = "G",
creative departments => "chairtype" = "T"
Every document contains fields with the same names:
"mijczjeqeo"
"vmfqvfjptu"
"fwtweaeeba"
...
and so on
113 people fill these fields from the web form every month. 1 person fills 1 document. After filling I make history of values in every document by php script.
"mijczjeqeo" value moves to "versions.0.content.mijczjeqeo"
"vmfqvfjptu" value moves to "versions.0.content.vmfqvfjptu"
"fwtweaeeba" value moves to "versions.0.content.fwtweaeeba"
...
and so on
and before saving last values...
all of "versions.0.content." values move to "versions.1.content."
all of "versions.1.content." values move to "versions.2.content."
all of "versions.2.content." values move to "versions.3.content."
...
and so on
I need to get the average value of each parameter for the last year grouped by chairtype
1 document for example ("Department of Organic Chemistry"):
{
"_id": ObjectId("52b85dfa32b6249513f15897"),
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 0,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"chairtype": "E",
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 1,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 0,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 3,
"fwtweaeeba": 52.31,
"fybnnlojgb": 5,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 2,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 9000000,
"jncncbplme": 3,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 1,
"ojklibfieg": 2,
"padotysmxb": 0,
"parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
"pidwyruvfq": 35.08,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 1,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"title_ru": "Кафедра органической химии",
"tmbagkmlgb": 0,
"type": "chair",
"uploaded": "1",
"uqcdessbeu": 0,
"url": "http:\/\/www.herzen.spb.ru\/main\/structure\/fukultets\/him\/1208434887\/",
"uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
"vacoxpronz": 0,
"vdjfydjrpa": 13,
"versions": {
"0": {
"_id": ObjectId("52dbbc4cfb0a29ce4a8b45bd"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 0,
"awquatbduv": 0,
"axdducvoxb": 3,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"confirmed": null,
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 2,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 3,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 55,
"fybnnlojgb": 0,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 3,
"hchpoxxnwz": 1849020,
"hmorfnbfvf": 2,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 2,
"ikxsvguboy": 1,
"ipjpwkbqnt": 0,
"izqighabwk": 1040000,
"jncncbplme": 1,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 1,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 2,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 1,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 11,
"ojklibfieg": 0,
"padotysmxb": 0,
"pidwyruvfq": 34,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 1,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 5,
"wewmtafjvk": 1,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 0,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3
},
"content_hash": "816090f397962f92f5329fa5bb0a9ec1",
"datetime": ISODate("2014-01-19T11:51:40.590Z"),
"description_ru": "Значение показателей за этап 1",
"label_ru": "Окончание этапа 1"
},
"1": {
"_id": ObjectId("5305372cfb0a2944638b45bc"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 1,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 1,
"cclwyezydc": 0,
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 0,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 52.31,
"fybnnlojgb": 0,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 0,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 0,
"jncncbplme": 0,
"jxkspszlrc": 1,
"kekarveuhb": 1,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 0,
"ojklibfieg": 1,
"padotysmxb": 0,
"pidwyruvfq": 34.15,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 0,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"visited": null,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 0,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 0,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
},
"content_hash": "d273fb095a7c08fef69fb90ec316fcb9",
"datetime": ISODate("2014-02-19T22:58:52.805Z"),
"description_ru": "Значение показателей за этап 2",
"label_ru": "Окончание этапа 2"
},
"2": {
"_id": ObjectId("532854a3fb0a2973718b45c6"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 2,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 1,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 52.15,
"fybnnlojgb": 5,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 2,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 0,
"jncncbplme": 0,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 0,
"ojklibfieg": 1,
"padotysmxb": 0,
"pidwyruvfq": 34.62,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 2,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 0,
"uploaded": null,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"visited": true,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 0,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 1,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 2,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
},
"content_hash": "64adcf5534b5b1f77282a95f0b14ef99",
"datetime": ISODate("2014-03-18T14:13:55.593Z"),
"description_ru": "Значение показателей за этап 3",
"document_id": "52b85dfa32b6249513f15897",
"document_uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
"label_ru": "Окончание этапа 3"
}
},
"visited": true,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 1,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 1,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
}
Now earliest array with history of data is "versions.2.", but 2 weeks later it will be "versions.3." and 6 weeks later it will be "versions.4." and so on...
**this is average of "versions.0.vofeebpgsc" values grouped by "chairtype":**
array(2) {
["result"]=>
array(3) {
[0]=>
array(2) {
["_id"]=>
string(1) "E"
["MID"]=>
float(1.3903333333333)
}
[1]=>
array(2) {
["_id"]=>
string(1) "T"
["MID"]=>
float(0.4)
}
[2]=>
array(2) {
["_id"]=>
string(1) "G"
["MID"]=>
float(0.72931034482759)
}
}
["ok"]=>
float(1)
}
function getMiddle($itemName,$chairType){
//return average of "versions.0.$itemName" value for "chairtype" = $chairType
switch ($chairType){
case 'E':
$chairType = 0;
break;
case 'T':
$chairType = 1;
break;
case 'G':
$chairType = 2;
break;
}
$m = new MongoClient();
$db = $m->foo_data;
$collection = new MongoCollection($db, 'documents');
$thisField = '$versions.content.'.$itemName;
$out = $collection->aggregate(
array('$match' => array('type' => 'chair')
),
array('$unwind' => '$versions'),
array('$group' => array( '_id' => '$_id',
'chairtype' => array('$first' =>'$chairtype'),
'versions' => array('$first' => '$versions')
)),
array('$group'=> array( '_id'=>'$chairtype',
'MID'=> array('$avg'=> $thisField)
))
);
return round ($out['result'][$chairType]['MID'],2);
}
echo getMiddle('vofeebpgsc','G'); //I use it in foreach cycle and send different input values
I need to get average value for each parameter between "versions.0.param_name" and "versions.11.param_name" in every document grouped by "chairtype" (it will be average from parameter history for last year for each type of university department) I got this task because the average value of "versions.0.param_name" is not very good reflects the statistics.
for example... I have 60 documents with "chairtype" = "E" and now I have '0','1' and '2' arrays of history in versions
I need to calculate:
(
"versions.0.content.fwtweaeeba" +
"versions.1.content.fwtweaeeba" +
"versions.2.content.fwtweaeeba" +
the same fields for each of 59 documents
) / (60*3) = It is good average of "fwtweaeeba" parameter for "chairtype" = "E"
9 months later I will have 60 documents with "chairtype" = "E" and I will have '0','1','2','3','4','5','6','7',8','9','10','11', arrays of history in versions
I will need to calculate:
(
"versions.0.content.fwtweaeeba" +
"versions.1.content.fwtweaeeba" +
"versions.2.content.fwtweaeeba" +
"versions.3.content.fwtweaeeba" +
"versions.4.content.fwtweaeeba" +
"versions.5.content.fwtweaeeba" +
"versions.6.content.fwtweaeeba" +
"versions.7.content.fwtweaeeba" +
"versions.8.content.fwtweaeeba" +
"versions.9.content.fwtweaeeba" +
"versions.10.content.fwtweaeeba" +
"versions.11.content.fwtweaeeba" +
the same fields for each of 59 documents
) / (60*12) = It will be good average of "fwtweaeeba" parameter for "chairtype" = "E"
and so on for each "chairtype"
Can I do it with mongo db aggregation framework? How can I do it? Сan anyone suggest a good HOWTO about compiling difficult aggregation queries in MongoDB?
回答1:
The data structure here is not a good implementation, there are lots of problems with how this is structured and it is completely unsuited to aggregation. The main problems here are:
Your structure does not actually use any arrays, right now it does not
All of the specific key names case a real problem, and this can be avoided.
As such the only way to traverse this sort of structure is using JavaScript with mapReduce.
Defining a mapper:
var mapper = function () {
for ( var n in this.versions ) {
for ( var k in this.versions[n].content ) {
if (
( k != 'confirmed' ) ||
( k != 'visited' ) )
emit(
{
type: this.chairtype,
key: k
},
this.versions[n].content[k]
);
}
}
};
So what this is doing is cycling through each of the versions entries and then also through everything on content. The key is emitted for each of the content keys you want as well as by the "chairtype" key. And the value is that matching value.
And then a reducer:
var reducer = function (key,values) {
return ( Array.sum( values ) != 0 )
? Array.sum( values ) / values.length : 0;
};
Which is just a simple way of producing a an average from all the values coming in for the mapper with the same key.
So while that should work nicely, what you should be doing is changing your structure. So in fact if you had something like this:
{
"_id": ObjectId("52b85dfa32b6249513f15897"),
"parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
"type": "chair",
"chairtype": "E",
"content": [
{ "key": "atkswlntfd", "value": 0, "version": 0 },
{ "key": "auwbsjqzir", "value": 0, "version": 0 },
{ "key": "avqrnjzbgd", "value": 0, "version": 0 }
]
}
Or generally more or less in that form, the the aggregation operation becomes very simple:
db.collection.aggregate([
{ "$unwind": "$content" },
{ "$group": {
"_id": {
"chairtype": "$chairtype",
"key": "$content.key"
},
"average": { "$avg": "$content.value" }
}}
])
Or whatever other variation of this is required, but now it is made possible by changing the structure.
So without the document being structured differently you will need to use mapReduce to do this.
来源:https://stackoverflow.com/questions/23063840/aggregate-mongo-data-php