How to find a word from arrays of characters?

前端 未结 4 1442
感动是毒
感动是毒 2020-12-14 04:55

What is the best way to solve this:

I have a group of arrays with 3-4 characters inside each like so:

{p,     {a,    {t,    {m,
 q,      b,     u,            


        
4条回答
  •  刺人心
    刺人心 (楼主)
    2020-12-14 05:14

    I had some Scrabble code laying around, so I was able to throw this together. The dictionary I used is sowpods (267751 words). The code below reads the dictionary as a text file with one uppercase word on each line.

    The code is C#:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    using System.Diagnostics;
    
    namespace SO_6022848
    {
      public struct Letter
      {
        public const string Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
        public static implicit operator Letter(char c)
        {
          return new Letter() { Index = Chars.IndexOf(c) };
        }
        public int Index;
        public char ToChar()
        {
          return Chars[Index];
        }
        public override string ToString()
        {
          return Chars[Index].ToString();
        }
      }
    
      public class Trie
      {
        public class Node
        {
          public string Word;
          public bool IsTerminal { get { return Word != null; } }
          public Dictionary Edges = new Dictionary();
        }
    
        public Node Root = new Node();
    
        public Trie(string[] words)
        {
          for (int w = 0; w < words.Length; w++)
          {
            var word = words[w];
            var node = Root;
            for (int len = 1; len <= word.Length; len++)
            {
              var letter = word[len - 1];
              Node next;
              if (!node.Edges.TryGetValue(letter, out next))
              {
                next = new Node();
                if (len == word.Length)
                {
                  next.Word = word;
                }
                node.Edges.Add(letter, next);
              }
              node = next;
            }
          }
        }
    
      }
    
      class Program
      {
        static void GenWords(Trie.Node n, HashSet[] sets, int currentArrayIndex, List wordsFound)
        {
          if (currentArrayIndex < sets.Length)
          {
            foreach (var edge in n.Edges)
            {
              if (sets[currentArrayIndex].Contains(edge.Key))
              {
                if (edge.Value.IsTerminal)
                {
                  wordsFound.Add(edge.Value.Word);
                }
                GenWords(edge.Value, sets, currentArrayIndex + 1, wordsFound);
              }
            }
          }
        }
    
        static void Main(string[] args)
        {
          const int minArraySize = 3;
          const int maxArraySize = 4;
          const int setCount = 10;
          const bool generateRandomInput = true;
    
          var trie = new Trie(File.ReadAllLines("sowpods.txt"));
          var watch = new Stopwatch();
          var trials = 10000;
          var wordCountSum = 0;
          var rand = new Random(37);
    
          for (int t = 0; t < trials; t++)
          {
            HashSet[] sets;
            if (generateRandomInput)
            {
              sets = new HashSet[setCount];
              for (int i = 0; i < setCount; i++)
              {
                sets[i] = new HashSet();
                var size = minArraySize + rand.Next(maxArraySize - minArraySize + 1);
                while (sets[i].Count < size)
                {
                  sets[i].Add(Letter.Chars[rand.Next(Letter.Chars.Length)]);
                }
              }
            }
            else
            {
              sets = new HashSet[] { 
              new HashSet(new Letter[] { 'P', 'Q', 'R', 'S' }), 
              new HashSet(new Letter[] { 'A', 'B', 'C' }), 
              new HashSet(new Letter[] { 'T', 'U', 'V' }), 
              new HashSet(new Letter[] { 'M', 'N', 'O' }) };
            }
    
            watch.Start();
            var wordsFound = new List();
            for (int i = 0; i < sets.Length - 1; i++)
            {
              GenWords(trie.Root, sets, i, wordsFound);
            }
            watch.Stop();
            wordCountSum += wordsFound.Count;
            if (!generateRandomInput && t == 0)
            {
              foreach (var word in wordsFound)
              {
                Console.WriteLine(word);
              }
            }
          }
          Console.WriteLine("Elapsed per trial = {0}", new TimeSpan(watch.Elapsed.Ticks / trials));
          Console.WriteLine("Average word count per trial = {0:0.0}", (float)wordCountSum / trials);
        }
    
      }
    
    }
    

    Here is the output when using your test data:

    PA
    PAT
    PAV
    QAT
    RAT
    RATO
    RAUN
    SAT
    SAU
    SAV
    SCUM
    AT
    AVO
    BUM
    BUN
    CUM
    TO
    UM
    UN
    Elapsed per trial = 00:00:00.0000725
    Average word count per trial = 19.0
    

    And the output when using random data (does not print each word):

    Elapsed per trial = 00:00:00.0002910
    Average word count per trial = 62.2
    

    EDIT: I made it much faster with two changes: Storing the word at each terminal node of the trie, so that it doesn't have to be rebuilt. And storing the input letters as an array of hash sets instead of an array of arrays, so that the Contains() call is fast.

提交回复
热议问题