首先要讲下,为什么需要使用filter过滤
过滤并不会返回一个匹配度score,以至于它比查询要快很多
过滤查询后的结果能被缓存到内存中,并被多次重复使用.
1.如果我们要查询出account中blance从20000到30000之间的数据
curl -XPOST localhost:9200/bank/_search?pretty -d '{
"query":{
"filtered":{
"query":{
"match_all":{}},
"filter":{
"range":{
"balance":{
"gte":20000,
"lte":30000
}
}
}
}
}
}'
{
"took" : 102,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 217,
"max_score" : 1.0,
"hits" : [ {
"_index" : "bank",
"_type" : "account",
"_id" : "49",
"_score" : 1.0,
"_source":{"account_number":49,"balance":29104,"firstname":"Fulton","lastname":"Holt","age":23,"gender":"F","address":"451 Humboldt Street","employer":"Anocha","email":"fultonholt@anocha.com","city":"Sunriver","state":"RI"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "102",
"_score" : 1.0,
"_source":{"account_number":102,"balance":29712,"firstname":"Dena","lastname":"Olson","age":27,"gender":"F","address":"759 Newkirk Avenue","employer":"Hinway","email":"denaolson@hinway.com","city":"Choctaw","state":"NJ"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "133",
"_score" : 1.0,
"_source":{"account_number":133,"balance":26135,"firstname":"Deena","lastname":"Richmond","age":36,"gender":"F","address":"646 Underhill Avenue","employer":"Sunclipse","email":"deenarichmond@sunclipse.com","city":"Austinburg","state":"SC"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "140",
"_score" : 1.0,
"_source":{"account_number":140,"balance":26696,"firstname":"Cotton","lastname":"Christensen","age":32,"gender":"M","address":"878 Schermerhorn Street","employer":"Prowaste","email":"cottonchristensen@prowaste.com","city":"Mayfair","state":"LA"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "203",
"_score" : 1.0,
"_source":{"account_number":203,"balance":21890,"firstname":"Eve","lastname":"Wyatt","age":33,"gender":"M","address":"435 Furman Street","employer":"Assitia","email":"evewyatt@assitia.com","city":"Jamestown","state":"MN"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "239",
"_score" : 1.0,
"_source":{"account_number":239,"balance":25719,"firstname":"Chang","lastname":"Boyer","age":36,"gender":"M","address":"895 Brigham Street","employer":"Qaboos","email":"changboyer@qaboos.com","city":"Belgreen","state":"NH"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "241",
"_score" : 1.0,
"_source":{"account_number":241,"balance":25379,"firstname":"Schroeder","lastname":"Harrington","age":26,"gender":"M","address":"610 Tapscott Avenue","employer":"Otherway","email":"schroederharrington@otherway.com","city":"Ebro","state":"TX"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "246",
"_score" : 1.0,
"_source":{"account_number":246,"balance":28405,"firstname":"Katheryn","lastname":"Foster","age":21,"gender":"F","address":"259 Kane Street","employer":"Quantalia","email":"katherynfoster@quantalia.com","city":"Bath","state":"TX"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "253",
"_score" : 1.0,
"_source":{"account_number":253,"balance":20240,"firstname":"Melissa","lastname":"Gould","age":31,"gender":"M","address":"440 Fuller Place","employer":"Buzzopia","email":"melissagould@buzzopia.com","city":"Lumberton","state":"MD"}
}, {
"_index" : "bank",
"_type" : "account",
"_id" : "277",
"_score" : 1.0,
"_source":{"account_number":277,"balance":29564,"firstname":"Romero","lastname":"Lott","age":31,"gender":"M","address":"456 Danforth Street","employer":"Plasto","email":"romerolott@plasto.com","city":"Vincent","state":"VT"}
} ]
}
}
可以看到查询的结果都是在指定范围内
2.当然elasticsearch能够像sql一样使用聚合函数
curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state"
}
}
}
}'
这个例子其实就是根据state字段进行分组.相当于下列sql语句
SELECT COUNT(*) from bank GROUP BY state ORDER BY COUNT(*) DESC
结果为:
{
"took" : 190,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1000,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_state" : {
"buckets" : [ {
"key" : "al",
"doc_count" : 21
}, {
"key" : "tx",
"doc_count" : 17
}, {
"key" : "id",
"doc_count" : 15
}, {
"key" : "ma",
"doc_count" : 15
}, {
"key" : "md",
"doc_count" : 15
}, {
"key" : "pa",
"doc_count" : 15
}, {
"key" : "dc",
"doc_count" : 14
}, {
"key" : "me",
"doc_count" : 14
}, {
"key" : "mo",
"doc_count" : 14
}, {
"key" : "nd",
"doc_count" : 14
} ]
}
}
}
其中key就是分组中的state值,doc_count就是个数.group_by_state只是分组的一个别名
我们再使用gender来进行分组
curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_gender": {
"terms": {
"field": "gender"
}
}
}
}'
{
"took" : 30,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1000,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_gender" : {
"buckets" : [ {
"key" : "m",
"doc_count" : 507
}, {
"key" : "f",
"doc_count" : 493
} ]
}
}
}
可以发现M的有507个,f的有493个,可以看到这两次查询都设置了size为0,因为我们不想显示匹配的一条条数据,只想看聚合的结果.如果去掉size=0,那么hits节点下hits中会存在数据.
如果我需要不仅要查询state的分组信息,还要查询出各个分组中的blance的平均数
curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state"
},
"aggs": {
"average_balance": {
"avg": {
"field": "balance"
}
}
}
}
}
}'
{
"took" : 34,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1000,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_state" : {
"buckets" : [ {
"key" : "al",
"doc_count" : 21,
"average_balance" : {
"value" : 25377.571428571428
}
}, {
"key" : "tx",
"doc_count" : 17,
"average_balance" : {
"value" : 22466.058823529413
}
}, {
"key" : "id",
"doc_count" : 15,
"average_balance" : {
"value" : 23614.933333333334
}
}, {
"key" : "ma",
"doc_count" : 15,
"average_balance" : {
"value" : 29064.666666666668
}
}, {
"key" : "md",
"doc_count" : 15,
"average_balance" : {
"value" : 20143.733333333334
}
}, {
"key" : "pa",
"doc_count" : 15,
"average_balance" : {
"value" : 25320.933333333334
}
}, {
"key" : "dc",
"doc_count" : 14,
"average_balance" : {
"value" : 24543.64285714286
}
}, {
"key" : "me",
"doc_count" : 14,
"average_balance" : {
"value" : 20061.14285714286
}
}, {
"key" : "mo",
"doc_count" : 14,
"average_balance" : {
"value" : 25414.64285714286
}
}, {
"key" : "nd",
"doc_count" : 14,
"average_balance" : {
"value" : 31717.571428571428
}
} ]
}
}
}
注意average_balance是别名
如果我需要根据查出的balance平均数进行一个排序呢?
curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state",
"order": {
"average_balance": "desc"
}
},
"aggs": {
"average_balance": {
"avg": {
"field": "balance"
}
}
}
}
}
}'
3.我需要将20-29,30-39,40-49这三个年龄段的账户信息进行分组
curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_age": {
"range": {
"field": "age",
"ranges": [
{
"from": 20,
"to": 30
},
{
"from": 30,
"to": 40
},
{
"from": 40,
"to": 50
}
]
},
"aggs": {
"group_by_gender": {
"terms": {
"field": "gender"
},
"aggs": {
"average_balance": {
"avg": {
"field": "balance"
}
}
}
}
}
}
}
}'
{
"took" : 21,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1000,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_age" : {
"buckets" : [ {
"key" : "20.0-30.0",
"from" : 20.0,
"from_as_string" : "20.0",
"to" : 30.0,
"to_as_string" : "30.0",
"doc_count" : 451,
"group_by_gender" : {
"buckets" : [ {
"key" : "m",
"doc_count" : 232,
"average_balance" : {
"value" : 27374.05172413793
}
}, {
"key" : "f",
"doc_count" : 219,
"average_balance" : {
"value" : 25341.260273972603
}
} ]
}
}, {
"key" : "30.0-40.0",
"from" : 30.0,
"from_as_string" : "30.0",
"to" : 40.0,
"to_as_string" : "40.0",
"doc_count" : 504,
"group_by_gender" : {
"buckets" : [ {
"key" : "f",
"doc_count" : 253,
"average_balance" : {
"value" : 25670.869565217392
}
}, {
"key" : "m",
"doc_count" : 251,
"average_balance" : {
"value" : 24288.239043824702
}
} ]
}
}, {
"key" : "40.0-50.0",
"from" : 40.0,
"from_as_string" : "40.0",
"to" : 50.0,
"to_as_string" : "50.0",
"doc_count" : 45,
"group_by_gender" : {
"buckets" : [ {
"key" : "m",
"doc_count" : 24,
"average_balance" : {
"value" : 26474.958333333332
}
}, {
"key" : "f",
"doc_count" : 21,
"average_balance" : {
"value" : 27992.571428571428
}
} ]
}
} ]
}
}
}
可以看到结果如上.
来源:oschina
链接:https://my.oschina.net/u/866412/blog/363252