Multi-field, multi-word, match without query_string

后端 未结 4 1570
清酒与你
清酒与你 2020-12-23 21:58

I would like to be able to match a multi word search against multiple fields where every word searched is contained in any of the fields, any combination. T

4条回答
  •  -上瘾入骨i
    2020-12-23 22:48

    What you are looking for is the multi-match query, but it doesn't perform in quite the way you would like.

    Compare the output of validate for multi_match vs query_string.

    multi_match (with operator and) will make sure that ALL terms exist in at least one field:

    curl -XGET 'http://127.0.0.1:9200/_validate/query?pretty=1&explain=true'  -d '
    {
       "multi_match" : {
          "operator" : "and",
          "fields" : [
             "firstname",
             "lastname"
          ],
          "query" : "john smith"
       }
    }
    '
    
    # {
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 1,
    #       "total" : 1
    #    },
    #    "explanations" : [
    #       {
    #          "index" : "test",
    #          "explanation" : "((+lastname:john +lastname:smith) | (+firstname:john +firstname:smith))",
    #          "valid" : true
    #       }
    #    ],
    #    "valid" : true
    # }
    

    While query_string (with default_operator AND) will check that EACH term exists in at least one field:

    curl -XGET 'http://127.0.0.1:9200/_validate/query?pretty=1&explain=true'  -d '
    {
       "query_string" : {
          "fields" : [
             "firstname",
             "lastname"
          ],
          "query" : "john smith",
          "default_operator" : "AND"
       }
    }
    '
    
    # {
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 1,
    #       "total" : 1
    #    },
    #    "explanations" : [
    #       {
    #          "index" : "test",
    #          "explanation" : "+(firstname:john | lastname:john) +(firstname:smith | lastname:smith)",
    #          "valid" : true
    #       }
    #    ],
    #    "valid" : true
    # }
    

    So you have a few choices to achieve what you are after:

    1. Preparse the search terms, to remove things like wildcards, etc, before using the query_string

    2. Preparse the search terms to extract each word, then generate a multi_match query per word

    3. Use index_name in your mapping for the name fields to index their data into a single field, which you can then use for search. (like your own custom all field):

    As follows:

    curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1'  -d '
    {
       "mappings" : {
          "test" : {
             "properties" : {
                "firstname" : {
                   "index_name" : "name",
                   "type" : "string"
                },
                "lastname" : {
                   "index_name" : "name",
                   "type" : "string"
                }
             }
          }
       }
    }
    '
    
    curl -XPOST 'http://127.0.0.1:9200/test/test?pretty=1'  -d '
    {
       "firstname" : "john",
       "lastname" : "smith"
    }
    '
    
    curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1'  -d '
    {
       "query" : {
          "match" : {
             "name" : {
                "operator" : "and",
                "query" : "john smith"
             }
          }
       }
    }
    '
    
    # {
    #    "hits" : {
    #       "hits" : [
    #          {
    #             "_source" : {
    #                "firstname" : "john",
    #                "lastname" : "smith"
    #             },
    #             "_score" : 0.2712221,
    #             "_index" : "test",
    #             "_id" : "VJFU_RWbRNaeHF9wNM8fRA",
    #             "_type" : "test"
    #          }
    #       ],
    #       "max_score" : 0.2712221,
    #       "total" : 1
    #    },
    #    "timed_out" : false,
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 5,
    #       "total" : 5
    #    },
    #    "took" : 33
    # }
    

    Note however, that firstname and lastname are no longer searchable independently. The data for both fields has been indexed into name.

    You could use multi-fields with the path parameter to make them searchable both independently and together, as follows:

    curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1'  -d '
    {
       "mappings" : {
          "test" : {
             "properties" : {
                "firstname" : {
                   "fields" : {
                      "firstname" : {
                         "type" : "string"
                      },
                      "any_name" : {
                         "type" : "string"
                      }
                   },
                   "path" : "just_name",
                   "type" : "multi_field"
                },
                "lastname" : {
                   "fields" : {
                      "any_name" : {
                         "type" : "string"
                      },
                      "lastname" : {
                         "type" : "string"
                      }
                   },
                   "path" : "just_name",
                   "type" : "multi_field"
                }
             }
          }
       }
    }
    '
    
    curl -XPOST 'http://127.0.0.1:9200/test/test?pretty=1'  -d '
    {
       "firstname" : "john",
       "lastname" : "smith"
    }
    '
    

    Searching the any_name field works:

    curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1'  -d '
    {
       "query" : {
          "match" : {
             "any_name" : {
                "operator" : "and",
                "query" : "john smith"
             }
          }
       }
    }
    '
    
    # {
    #    "hits" : {
    #       "hits" : [
    #          {
    #             "_source" : {
    #                "firstname" : "john",
    #                "lastname" : "smith"
    #             },
    #             "_score" : 0.2712221,
    #             "_index" : "test",
    #             "_id" : "Xf9qqKt0TpCuyLWioNh-iQ",
    #             "_type" : "test"
    #          }
    #       ],
    #       "max_score" : 0.2712221,
    #       "total" : 1
    #    },
    #    "timed_out" : false,
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 5,
    #       "total" : 5
    #    },
    #    "took" : 11
    # }
    

    Searching firstname for john AND smith doesn't work:

    curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1'  -d '
    {
       "query" : {
          "match" : {
             "firstname" : {
                "operator" : "and",
                "query" : "john smith"
             }
          }
       }
    }
    '
    
    # {
    #    "hits" : {
    #       "hits" : [],
    #       "max_score" : null,
    #       "total" : 0
    #    },
    #    "timed_out" : false,
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 5,
    #       "total" : 5
    #    },
    #    "took" : 2
    # }
    

    But searching firstname for just john works correctly:

    curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1'  -d '
    {
       "query" : {
          "match" : {
             "firstname" : {
                "operator" : "and",
                "query" : "john"
             }
          }
       }
    }
    '
    
    # {
    #    "hits" : {
    #       "hits" : [
    #          {
    #             "_source" : {
    #                "firstname" : "john",
    #                "lastname" : "smith"
    #             },
    #             "_score" : 0.30685282,
    #             "_index" : "test",
    #             "_id" : "Xf9qqKt0TpCuyLWioNh-iQ",
    #             "_type" : "test"
    #          }
    #       ],
    #       "max_score" : 0.30685282,
    #       "total" : 1
    #    },
    #    "timed_out" : false,
    #    "_shards" : {
    #       "failed" : 0,
    #       "successful" : 5,
    #       "total" : 5
    #    },
    #    "took" : 3
    # }
    

提交回复
热议问题