I searched about the question but couldn\'t find any useful answer. I want to get the total count for each word in a document, for example I have some tweets in my indices a
You're looking for term vectors, which leverages analyzers. As as it do so, you can define any analyzer you need, i.e. stemming analyzer to transform words to root/normal form. Take a look at documentation for further details.
In:
POST so/_close
PUT so/_settings
{
"settings": {
"analysis":{
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "my_stemmer"]
}
},
"filter": {
"my_stemmer": {
"type": "stemmer",
"name": "english"
}
}
}
}
}
POST so/_open
PUT so/t1/_mapping
{
"t1": {
"properties": {
"tweet": {
"type": "string",
"store": true,
"index_analyzer": "my_analyzer"
}
}
}
}
POST so/t1/1
{"tweet": "It is so boring here I want to go to my home sweet home. So I'm bored"}
Out:
{
"_index": "so",
"_type": "t1",
"_id": "1",
"_version": 2,
"found": true,
"term_vectors": {
"tweet": {
"field_statistics": {
"sum_doc_freq": 13,
"doc_count": 1,
"sum_ttf": 17
},
"terms": {
"bore": {
"term_freq": 2,
...
},
"go": {
"term_freq": 1,
...
},
"here": {
"term_freq": 1,
...
},
"home": {
"term_freq": 2,
...
},
"i": {
"term_freq": 1,
...
},
"i'm": {
"term_freq": 1,
...
},
"is": {
"term_freq": 1,
...
},
"it": {
"term_freq": 1,
...
},
"my": {
"term_freq": 1,
...
},
"so": {
"term_freq": 2,
...
},
"sweet": {
"term_freq": 1,
...
},
"to": {
"term_freq": 2,
...
},
"want": {
"term_freq": 1,
...
}
}
}
}
}