问题
I have 100 million records in my "sample" collection. I want to have another collection with all of the distinct user names "user.screen_name"
I have the following structure in my mongodb database "sample" collection:
{
"_id" : ObjectId("515af34297c2f607b822a54b"),
"text" : "random text goes here",
"user" :
{
"id" : 972863366,
"screen_name" : "xname",
"verified" : false,
"time_zone" : "Amsterdam",
}
}
When I try things like "distinct('user.id).length" I get the following error:
"errmsg" : "exception: distinct too big, 16mb cap",
I need an efficient way to have another collection with only {"user_name": "name"} of distinct users in my "sample" collection. so then I can query the size of this new database and get the number of distinct users. (and for further analysis in the future)
回答1:
I tried the solution I found here and it worked fine :) .. I'll keep the thread and add my code in case someone needs it.
var SOURCE = db.sample;
var DEST = db.distinct;
DEST.drop();
map = function() {
emit( this.user.screen_name , {count: 1});
}
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
});
return {count: count};
};
res = SOURCE.mapReduce( map, reduce,
{ out: 'distinct',
verbose: true
}
);
print( "distinct count= " + res.counts.output );
print( "distinct count=", DEST.count() );
Regards
来源:https://stackoverflow.com/questions/15878011/distinct-values-of-a-key-in-a-sub-document-mongodb-100-million-records