How to do query auto-completion/suggestions in Lucene?

后端 未结 5 1891
刺人心
刺人心 2020-11-27 09:35

I\'m looking for a way to do query auto-completion/suggestions in Lucene. I\'ve Googled around a bit and played around a bit, but all of the examples I\'ve seen seem to be s

5条回答
  •  天命终不由人
    2020-11-27 10:09

    Here's a transliteration of Mat's implementation into C# for Lucene.NET, along with a snippet for wiring a text box using jQuery's autocomplete feature.

    
    

    ... JQuery Autocomplete:

    // don't navigate away from the field when pressing tab on a selected item
    $( "#search-input" ).keydown(function (event) {
        if (event.keyCode === $.ui.keyCode.TAB && $(this).data("autocomplete").menu.active) {
            event.preventDefault();
        }
    });
    
    $( "#search-input" ).autocomplete({
        source: '@Url.Action("SuggestTerms")', // <-- ASP.NET MVC Razor syntax
        minLength: 2,
        delay: 500,
        focus: function () {
            // prevent value inserted on focus
            return false;
        },
        select: function (event, ui) {
            var terms = this.value.split(/\s+/);
            terms.pop(); // remove dropdown item
            terms.push(ui.item.value.trim()); // add completed item
            this.value = terms.join(" "); 
            return false;
        },
     });
    

    ... here's the ASP.NET MVC Controller code:

        //
        // GET: /MyApp/SuggestTerms?term=something
        public JsonResult SuggestTerms(string term)
        {
            if (string.IsNullOrWhiteSpace(term))
                return Json(new string[] {});
    
            term = term.Split().Last();
    
            // Fetch suggestions
            string[] suggestions = SearchSvc.SuggestTermsFor(term).ToArray();
    
            return Json(suggestions, JsonRequestBehavior.AllowGet);
        }
    

    ... and here's Mat's code in C#:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using Lucene.Net.Store;
    using Lucene.Net.Index;
    using Lucene.Net.Search;
    using SpellChecker.Net.Search.Spell;
    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.Standard;
    using Lucene.Net.Analysis.NGram;
    using Lucene.Net.Documents;
    
    namespace Cipher.Services
    {
        /// 
        /// Search term auto-completer, works for single terms (so use on the last term of the query).
        /// Returns more popular terms first.
        /// 
    /// Author: Mat Mannion, M.Mannion@warwick.ac.uk /// ///
    /// public class SearchAutoComplete { public int MaxResults { get; set; } private class AutoCompleteAnalyzer : Analyzer { public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { TokenStream result = new StandardTokenizer(kLuceneVersion, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords)); result = new EdgeNGramTokenFilter( result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.DEFAULT_SIDE,1, 20); return result; } } private static readonly Lucene.Net.Util.Version kLuceneVersion = Lucene.Net.Util.Version.LUCENE_29; private static readonly String kGrammedWordsField = "words"; private static readonly String kSourceWordField = "sourceWord"; private static readonly String kCountField = "count"; private static readonly String[] kEnglishStopWords = { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "i", "if", "in", "into", "is", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" }; private readonly Directory m_directory; private IndexReader m_reader; private IndexSearcher m_searcher; public SearchAutoComplete(string autoCompleteDir) : this(FSDirectory.Open(new System.IO.DirectoryInfo(autoCompleteDir))) { } public SearchAutoComplete(Directory autoCompleteDir, int maxResults = 8) { this.m_directory = autoCompleteDir; MaxResults = maxResults; ReplaceSearcher(); } /// /// Find terms matching the given partial word that appear in the highest number of documents. /// A word or part of a word /// A list of suggested completions public IEnumerable SuggestTermsFor(string term) { if (m_searcher == null) return new string[] { }; // get the top terms for query Query query = new TermQuery(new Term(kGrammedWordsField, term.ToLower())); Sort sort = new Sort(new SortField(kCountField, SortField.INT)); TopDocs docs = m_searcher.Search(query, null, MaxResults, sort); string[] suggestions = docs.ScoreDocs.Select(doc => m_reader.Document(doc.Doc).Get(kSourceWordField)).ToArray(); return suggestions; } /// /// Open the index in the given directory and create a new index of word frequency for the /// given index. /// Directory containing the index to count words in. /// The field in the index that should be analyzed. public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete) { // build a dictionary (from the spell package) using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) { LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) //IndexWriter.Unlock(m_directory); // use a custom analyzer so we can do EdgeNGramFiltering var analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { writer.MergeFactor = 300; writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs foreach (string word in dict) { if (word.Length < 3) continue; // too short we bail but "too long" is fine... // ok index the word // use the number of documents this word appears in int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); var doc = MakeDocument(fieldToAutocomplete, word, freq); writer.AddDocument(doc); } writer.Optimize(); } } // re-open our reader ReplaceSearcher(); } private static Document MakeDocument(String fieldToAutocomplete, string word, int frequency) { var doc = new Document(); doc.Add(new Field(kSourceWordField, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term doc.Add(new Field(kGrammedWordsField, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed doc.Add(new Field(kCountField, frequency.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // count return doc; } private void ReplaceSearcher() { if (IndexReader.IndexExists(m_directory)) { if (m_reader == null) m_reader = IndexReader.Open(m_directory, true); else m_reader.Reopen(); m_searcher = new IndexSearcher(m_reader); } else { m_searcher = null; } } } }

提交回复
热议问题