Using Python, find anagrams for a list of words

前端 未结 22 880
失恋的感觉
失恋的感觉 2020-12-13 01:11

If I have a list of strings for example:

[\"car\", \"tree\", \"boy\", \"girl\", \"arc\"...]

What should I do in order to find anagrams in t

22条回答
  •  温柔的废话
    2020-12-13 01:35

    1. Calculate each word length.
    2. Calculate each word ascii character sum.
    3. Sort each word characters by their ascii values and set ordered word.
    4. Group words according to their lengths.
    5. For each group regroup list according to their ascii character sum.
    6. For each small list check only words ordered. If ordered words same these words anagram.

    Here we have 1000.000 words list. 1000.000 words

        namespace WindowsFormsApplication2
        {
            public class WordDef
            {
                public string Word { get; set; }
                public int WordSum { get; set; }
                public int Length { get; set; }       
                public string AnagramWord { get; set; }
                public string Ordered { get; set; }
                public int GetAsciiSum(string word)
                {
                    int sum = 0;
                    foreach (char c in word)
                    {
                        sum += (int)c;
                    }
                    return sum;
                }
            }
        }
    
        using System;
        using System.Collections.Concurrent;
        using System.Collections.Generic;
        using System.Diagnostics;
        using System.Linq;
        using System.Net;
        using System.Text;
        using System.Threading.Tasks;
        using System.Windows.Forms;
    
        namespace WindowsFormsApplication2
        {
            public partial class AngramTestForm : Form
            {
                private ConcurrentBag m_Words;
    
                private ConcurrentBag m_CacheWords;
    
                private ConcurrentBag m_Anagramlist;
                public AngramTestForm()
                {
                    InitializeComponent();
                    m_CacheWords = new ConcurrentBag();
                }
    
                private void button1_Click(object sender, EventArgs e)
                {
                    m_Words = null;
                    m_Anagramlist = null;
    
                    m_Words = new ConcurrentBag();
                    m_Anagramlist = new ConcurrentBag();
    
                    if (m_CacheWords.Count == 0)
                    {
                        foreach (var s in GetWords())
                        {
                            m_CacheWords.Add(s);
                        }
                    }
    
                    m_Words = m_CacheWords;
    
                    Stopwatch sw = new Stopwatch();
    
                    sw.Start();
    
                    //DirectCalculation();
    
                    AsciiCalculation();
    
                    sw.Stop();
    
                    Console.WriteLine("The End! {0}", sw.ElapsedMilliseconds);
    
                    this.Invoke((MethodInvoker)delegate
                    {
                        lbResult.Text = string.Concat(sw.ElapsedMilliseconds.ToString(), " Miliseconds");
                    });
    
                    StringBuilder sb = new StringBuilder();
                    foreach (var w in m_Anagramlist)
                    {
                        if (w != null)
                        {
                            sb.Append(string.Concat(w.Word, " - ", w.AnagramWord, Environment.NewLine));
                        }
                    }
    
                    txResult.Text = sb.ToString();
                }
    
                private void DirectCalculation()
                {
                    List wordDef = new List();
    
                    foreach (var w in m_Words)
                    {
                        WordDef wd = new WordDef();
                        wd.Word = w;
                        wd.WordSum = wd.GetAsciiSum(w);
                        wd.Length = w.Length;
                        wd.Ordered = String.Concat(w.OrderBy(c => c));
    
                        wordDef.Add(wd);
                    }
    
                    foreach (var w in wordDef)
                    {
                        foreach (var t in wordDef)
                        {
                            if (w.Word != t.Word)
                            {
                                if (w.Ordered == t.Ordered)
                                {
                                    t.AnagramWord = w.Word;
                                    m_Anagramlist.Add(new WordDef() { Word = w.Word, AnagramWord = t.Word });
                                }
                            }
                        }
                    }
                }
    
                private void AsciiCalculation()
                {
                    ConcurrentBag wordDef = new ConcurrentBag();
    
                    Parallel.ForEach(m_Words, w =>
                        {
                            WordDef wd = new WordDef();
                            wd.Word = w;
                            wd.WordSum = wd.GetAsciiSum(w);
                            wd.Length = w.Length;
                            wd.Ordered = String.Concat(w.OrderBy(c => c));
    
                            wordDef.Add(wd);                    
                        });
    
                    var tempWordByLength = from w in wordDef
                                           group w by w.Length into newGroup
                                           orderby newGroup.Key
                                           select newGroup;
    
                    foreach (var wList in tempWordByLength)
                    {
                        List wd = wList.ToList();
    
                        var tempWordsBySum = from w in wd
                                             group w by w.WordSum into newGroup
                                             orderby newGroup.Key
                                             select newGroup;
    
                        Parallel.ForEach(tempWordsBySum, ws =>
                            {
                                List we = ws.ToList();
    
                                if (we.Count > 1)
                                {
                                    CheckCandidates(we);
                                }
                            });
                    }
                }
    
                private void CheckCandidates(List we)
                {
                    for (int i = 0; i < we.Count; i++)
                    {
                        for (int j = i + 1; j < we.Count; j++)
                        {
                            if (we[i].Word != we[j].Word)
                            {
                                if (we[i].Ordered == we[j].Ordered)
                                {
                                    we[j].AnagramWord = we[i].Word;
                                    m_Anagramlist.Add(new WordDef() { Word = we[i].Word, AnagramWord = we[j].Word });
                                }
                            }
                        }
                    }
                }
    
                private static string[] GetWords()
                {
                    string htmlCode = string.Empty;
    
                    using (WebClient client = new WebClient())
                    {
                        htmlCode = client.DownloadString("https://raw.githubusercontent.com/danielmiessler/SecLists/master/Passwords/10_million_password_list_top_1000000.txt");
                    }
    
                    string[] words = htmlCode.Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);
    
                    return words;
                }
            }
        }
    

提交回复
热议问题