Posted on 26/05/2021 11:42:54
							
							
						 
						They do not want computer logic.
They want a list of input and suggestions they can apply them selves, because it requires a human brain with human decision process to give them what they find logic.
Below the code in question - enjoy :-). The project is attached. Then you can also do your own synonyms.
BR Nicolai
 
/// <summary>
    /// Represents Lucene spell checker
    /// </summary>
    public class LuceneSpellChecker
    {
        private readonly SpellChecker.Net.Search.Spell.SpellChecker checker;
        private readonly IndexReader indexReader;
        private readonly string indexField;
        private readonly int numberOfSuggestions;
        private bool isIndexed;
        /// <summary>
        /// Constructs new spell checker instance
        /// </summary>
        /// <param name="reader"></param>
        /// <param name="field"></param>
        public LuceneSpellChecker(IndexReader reader, string field)
        {
            indexReader = reader;
            indexField = field;
            //checker = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), new JaroWinklerDistance());
            checker = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory());
            numberOfSuggestions = Configuration.SystemConfiguration.Instance.GetInt32("/GlobalSettings/System/Repository/LuceneSpellChecker/NumberOfSuggestions");
            if (numberOfSuggestions <= 0)
                numberOfSuggestions = 10;
        }
        private void EnsureIndexed()
        {
            if (!isIndexed)
            {
                checker.IndexDictionary(new LuceneDictionary(indexReader, indexField));
                isIndexed = true;
            }
        }
        /// <summary>
        /// Suggest similar words.
        /// </summary>
        /// <param name="searchString">Word to find alternative suggestions for</param>
        public IEnumerable<string> SuggestSimilar(string searchString)
        {
            return SuggestSimilar(searchString, false);
        }
        /// <summary>
        /// Suggest similar words.
        /// </summary>
        /// <param name="searchString">Word to find alternative suggestions for</param>
        /// <param name="analyzed">If the field that is looked at for suggestions is analyzed</param>
        /// <returns></returns>
        public IEnumerable<string> SuggestSimilar(string searchString, bool analyzed)
        {
            EnsureIndexed();
            if (analyzed)
            {
                searchString = searchString.ToLowerInvariant();
            }
            List<string> searchTerms = new List<string>();
            Analyzer analyzer = new mylucene.Analysis.Standard.StandardAnalyzer(mylucene.Util.Version.LUCENE_30);
            using (var tokenStream = analyzer.TokenStream("inputquery", new System.IO.StringReader(searchString)))
            {
                tokenStream.Reset();
                while (tokenStream.IncrementToken())
                {
                    var termAttr = tokenStream.GetAttribute<ITermAttribute>();
                    searchTerms.Add(termAttr.Term);
                }
            }
            //var searchTerms = searchString.Split(new[] { ' ', '\r', '\n' }, System.StringSplitOptions.RemoveEmptyEntries).ToList();
            int depth = 1;
            bool foundSuggestions = false;
            string combinedWordTermMatch = string.Empty;
            SortedDictionary<int, string> suggestionsCombined = new SortedDictionary<int, string>(); //Our final result - will hold a combination of results for each word in the original search string
            foreach (string searchTerm in searchTerms) //Iterate the single words
            {
                int suggestionsToRequest = numberOfSuggestions;
                if (searchTerms.Count > depth)
                {
                    suggestionsToRequest = 1;
                }
                List<string> singlewordSuggestions;
                if (depth > 1 && depth == searchTerms.Count) //If we have a search string of several words, we will use the first suggestion(s) for first (2nd, 3rd, etc) word in search string, and a list of suggestions for last word
                {
                    singlewordSuggestions = GetTermsSuggestionsFromSearch(suggestionsCombined[0], searchTerm);
                    if (singlewordSuggestions.Count == 0)
                    {
                        string tempCombinedWordTermMatch = string.Empty;
                        singlewordSuggestions = GetTermSuggestions(searchTerm, suggestionsToRequest, string.Empty, out tempCombinedWordTermMatch); //Find suggestions for the singleword
                    }
                }
                else
                {
                    string combinedWord = string.Empty;
                    string tempCombinedWordTermMatch = string.Empty;
                    if (searchTerms.Count > 1)
                    {
                        combinedWord = searchTerms[0] + searchTerms[1];
                        suggestionsToRequest = 10;
                    }
                    singlewordSuggestions = GetTermSuggestions(searchTerm, suggestionsToRequest, combinedWord, out tempCombinedWordTermMatch); //Find suggestions for the singleword
                    if (string.IsNullOrEmpty(combinedWordTermMatch))
                    {
                        combinedWordTermMatch = tempCombinedWordTermMatch;
                    }
                }
                if (singlewordSuggestions == null)
                {
                    singlewordSuggestions = new List<string>();
                }
                if (singlewordSuggestions.Count > 0)
                {
                    foundSuggestions = true;
                }
                //If we do not have a suggestion for place i, use the original word in this place
                var wordToAdd = searchTerm;
                for (var i = 0; i < numberOfSuggestions; i++) //For each expected result, we will add a record to the result
                {
                    if (singlewordSuggestions.Count > i)
                    {
                        if (searchTerms.Count > 1 && depth == 1)
                        {
                            wordToAdd = singlewordSuggestions[0];
                        }
                        else
                        {
                            wordToAdd = singlewordSuggestions[i];
                        }
                        
                    }
                    //Add or update the result with our finding for this single word and insert the right place
                    if (suggestionsCombined.ContainsKey(i))
                    {
                        suggestionsCombined[i] += ' ' + wordToAdd;
                    }
                    else
                    {
                        suggestionsCombined.Add(i, wordToAdd);
                    }
                }
                depth++;
            }
            if (foundSuggestions && suggestionsCombined.Count > 0)
            {
                List<string> result = new List<string>();
                if (!string.IsNullOrEmpty(combinedWordTermMatch))
                {
                    result.Add(combinedWordTermMatch);
                }
                //if (searchTerms.Count > 1)
                //{
                //    string combinedWord = searchTerms[0] + searchTerms[1];
                //    string combinedWordSuggestion = GetTermSuggestions(combinedWord, 1)?.FirstOrDefault();
                //    if (!string.IsNullOrEmpty(combinedWordSuggestion) && combinedWordSuggestion.StartsWith(combinedWord, StringComparison.OrdinalIgnoreCase))
                //    {
                //        result.Add(combinedWordSuggestion);
                //    }
                //}
                foreach (var suggestion in suggestionsCombined)
                {
                    if (!result.Contains(suggestion.Value, StringComparer.OrdinalIgnoreCase))
                    {
                        result.Add(suggestion.Value);
                    }
                }
                return result.Take(numberOfSuggestions);
            }
            else
            {
                return Enumerable.Empty<string>();
            }
        }
        internal List<string> GetTermSuggestions(string word, int neededSuggestions, string combinedTwoWordTerm, out string combinedWordTermMatch)
        {
            combinedWordTermMatch = string.Empty;
            //Terms - find existing terms in the field that starts with the passed word
            List<string> termSuggestions = new List<string>(numberOfSuggestions);
            TermEnum terms = indexReader.Terms(new Term(indexField, word));
            int maxSuggestsCpt = 0;
            do
            {
                var term = terms.Term.Text;
                if (!string.IsNullOrEmpty(combinedTwoWordTerm) && term.StartsWith(combinedTwoWordTerm, System.StringComparison.OrdinalIgnoreCase))
                {
                    combinedWordTermMatch = term;
                }
                if (term.StartsWith(word, System.StringComparison.OrdinalIgnoreCase))
                {
                    if (!termSuggestions.Contains(term, StringComparer.OrdinalIgnoreCase))
                    {
                        termSuggestions.Add(term);
                        maxSuggestsCpt++;
                    }
                }
                if (maxSuggestsCpt >= neededSuggestions || maxSuggestsCpt == 0) //if maxSuggestsCpt = 0 means that there are no terms in this list starting with the search word - no reason to iterate
                    break;
            }
            while (terms.Next());
            if (termSuggestions.Count() >= neededSuggestions || word.Length < 2) //If there is enough suggestions or the word is one letter only
            {
                return termSuggestions;
            }
            //Add suggestions to the list of existing terms
            int missingSuggestions = neededSuggestions - termSuggestions.Count;
            List<string> metrics = GetSimilarSuggestions(word, missingSuggestions);
            foreach (string suggestion in metrics)
            {
                termSuggestions.Add(suggestion);
            }
            return termSuggestions.Distinct().ToList();
        }
        internal List<string> GetSimilarSuggestions(string word, int numberOfSuggestions)
        {
            //Suggestions
            var suggestions = checker.SuggestSimilar(word, numberOfSuggestions, indexReader, indexField, true);
            var jaro = new JaroWinklerDistance();
            var leven = new LevenshteinDistance();
            var ngram = new NGramDistance();
            var metrics = suggestions.Select(s => new
            {
                suggestion = s,
                freq = indexReader.DocFreq(new Term(indexField, s)),
                jaro = jaro.GetDistance(word, s),
                leven = leven.GetDistance(word, s),
                ngram = ngram.GetDistance(word, s)
            })
            .OrderByDescending(metric =>
                (
                    (metric.freq / 10f) +
                    metric.jaro +
                    metric.leven +
                    metric.ngram
                )
                / 4f
            )
             .ToList();
            return metrics.Select(m => m.suggestion).ToList();
        }
        internal List<string> GetTermsSuggestionsFromSearch(string termToSearch, string word)
        {
            List<string> termSuggestions = new List<string>();
            List<string> spellCheckedSuggestions = new List<string>(numberOfSuggestions);
            List<string> fallbacTermSuggestions = new List<string>(numberOfSuggestions);
            Analyzer analyzer = new mylucene.Analysis.Standard.StandardAnalyzer(mylucene.Util.Version.LUCENE_30);
            //QueryParser parser = new QueryParser(mylucene.Util.Version.LUCENE_30, indexField, analyzer);
            var parser = new MultiFieldQueryParser(mylucene.Util.Version.LUCENE_30, new[] { indexField }, analyzer);
            parser.DefaultOperator = QueryParser.Operator.AND;
            var query = parser.Parse(termToSearch);
            var booleanQuery = new BooleanQuery();
            booleanQuery.Add(query, Occur.MUST);
            var filter = new QueryWrapperFilter(booleanQuery);
            bool debugAdded = false;
            using (Searcher searcher = new IndexSearcher(indexReader))
            {
                string spellCheckedWord = string.Empty;
                var spellingSuggestion = GetSimilarSuggestions(word, 1);
                if (spellingSuggestion != null)
                {
                    spellCheckedWord = spellingSuggestion.FirstOrDefault();
                }
                TopScoreDocCollector collector = TopScoreDocCollector.Create(25, true);
                searcher.Search(booleanQuery, filter, collector);
                //TopDocs docs = searcher.Search(query, 10);
                var hits = collector.TopDocs().ScoreDocs;
                for (int i = 0; i < hits.Length; i++)
                {
                    ITermFreqVector vector = indexReader.GetTermFreqVector(hits[i].Doc, indexField);
                    //Get all terms and sort them by frequency - one document at the time.
                    List<TermFrequency> termFrequencies = new List<TermFrequency>();
                    var termCounts = vector.GetTermFrequencies();
                    int termArrayPointer = 0;
                    foreach (string term in vector?.GetTerms())
                    {
                        termFrequencies.Add(new TermFrequency(termCounts[termArrayPointer], term));
                        termArrayPointer++;
                    }
                    if (!debugAdded)
                    {
                        //string terms = string.Empty;
                        //foreach (var term in termFrequencies.OrderByDescending(o => o.Frequency))
                        //{
                        //    terms += $"{term.Term} ({term.Frequency}) ";
                        //}
                        //termSuggestions.Add($"DEBUG (F:{termToSearch} c:{hits.Count()} tf:{termFrequencies.Count} q:{query.ToString()}) {terms}");
                        debugAdded = true;
                    }
                    foreach (var term in termFrequencies.OrderByDescending(o => o.Frequency))
                    {
                        if (term.Term.StartsWith(word, StringComparison.OrdinalIgnoreCase))
                        {
                            termSuggestions.Add(term.Term);
                        }
                        else if (!string.IsNullOrEmpty(spellCheckedWord) && term.Term.StartsWith(spellCheckedWord, StringComparison.OrdinalIgnoreCase))
                        {
                            spellCheckedSuggestions.Add(term.Term);
                        }
                        else
                        {
                            if (fallbacTermSuggestions.Count < numberOfSuggestions)
                            {
                                fallbacTermSuggestions.Add(term.Term);
                            }
                        }
                    }
                    if (termSuggestions.Count > numberOfSuggestions)
                    {
                        break;
                    }
                }
            }
            if (termSuggestions.Count < numberOfSuggestions)
            {
                //Suggestions are missing. Add falback suggestions:
                termSuggestions.AddRange(spellCheckedSuggestions.Take(numberOfSuggestions - termSuggestions.Count));
                termSuggestions.AddRange(fallbacTermSuggestions.Take(numberOfSuggestions - termSuggestions.Count));
            }
            return termSuggestions;
        }
        internal class TermFrequency
        {
            public int Frequency;
            public string Term;
            public TermFrequency(int frequency, string term)
            {
                Frequency = frequency;
                Term = term;
            }
        }
    }