If I have a list of strings for example:
[\"car\", \"tree\", \"boy\", \"girl\", \"arc\"...]
What should I do in order to find anagrams in t
This works fine:
def find_ana(l):
a=[]
for i in range(len(l)):
for j in range(len(l)):
if (l[i]!=l[j]) and (sorted(l[i])==sorted(l[j])):
a.append(l[i])
a.append(l[j])
return list(set(a))
Create a dictionary of (sorted word, list of word). All the words that are in the same list are anagrams of each other.
from collections import defaultdict
def load_words(filename='/usr/share/dict/american-english'):
with open(filename) as f:
for word in f:
yield word.rstrip()
def get_anagrams(source):
d = defaultdict(list)
for word in source:
key = "".join(sorted(word))
d[key].append(word)
return d
def print_anagrams(word_source):
d = get_anagrams(word_source)
for key, anagrams in d.iteritems():
if len(anagrams) > 1:
print(key, anagrams)
word_source = load_words()
print_anagrams(word_source)
Or:
word_source = ["car", "tree", "boy", "girl", "arc"]
print_anagrams(word_source)
Here we have 1000.000 words list. 1000.000 words
namespace WindowsFormsApplication2
{
public class WordDef
{
public string Word { get; set; }
public int WordSum { get; set; }
public int Length { get; set; }
public string AnagramWord { get; set; }
public string Ordered { get; set; }
public int GetAsciiSum(string word)
{
int sum = 0;
foreach (char c in word)
{
sum += (int)c;
}
return sum;
}
}
}
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace WindowsFormsApplication2
{
public partial class AngramTestForm : Form
{
private ConcurrentBag<string> m_Words;
private ConcurrentBag<string> m_CacheWords;
private ConcurrentBag<WordDef> m_Anagramlist;
public AngramTestForm()
{
InitializeComponent();
m_CacheWords = new ConcurrentBag<string>();
}
private void button1_Click(object sender, EventArgs e)
{
m_Words = null;
m_Anagramlist = null;
m_Words = new ConcurrentBag<string>();
m_Anagramlist = new ConcurrentBag<WordDef>();
if (m_CacheWords.Count == 0)
{
foreach (var s in GetWords())
{
m_CacheWords.Add(s);
}
}
m_Words = m_CacheWords;
Stopwatch sw = new Stopwatch();
sw.Start();
//DirectCalculation();
AsciiCalculation();
sw.Stop();
Console.WriteLine("The End! {0}", sw.ElapsedMilliseconds);
this.Invoke((MethodInvoker)delegate
{
lbResult.Text = string.Concat(sw.ElapsedMilliseconds.ToString(), " Miliseconds");
});
StringBuilder sb = new StringBuilder();
foreach (var w in m_Anagramlist)
{
if (w != null)
{
sb.Append(string.Concat(w.Word, " - ", w.AnagramWord, Environment.NewLine));
}
}
txResult.Text = sb.ToString();
}
private void DirectCalculation()
{
List<WordDef> wordDef = new List<WordDef>();
foreach (var w in m_Words)
{
WordDef wd = new WordDef();
wd.Word = w;
wd.WordSum = wd.GetAsciiSum(w);
wd.Length = w.Length;
wd.Ordered = String.Concat(w.OrderBy(c => c));
wordDef.Add(wd);
}
foreach (var w in wordDef)
{
foreach (var t in wordDef)
{
if (w.Word != t.Word)
{
if (w.Ordered == t.Ordered)
{
t.AnagramWord = w.Word;
m_Anagramlist.Add(new WordDef() { Word = w.Word, AnagramWord = t.Word });
}
}
}
}
}
private void AsciiCalculation()
{
ConcurrentBag<WordDef> wordDef = new ConcurrentBag<WordDef>();
Parallel.ForEach(m_Words, w =>
{
WordDef wd = new WordDef();
wd.Word = w;
wd.WordSum = wd.GetAsciiSum(w);
wd.Length = w.Length;
wd.Ordered = String.Concat(w.OrderBy(c => c));
wordDef.Add(wd);
});
var tempWordByLength = from w in wordDef
group w by w.Length into newGroup
orderby newGroup.Key
select newGroup;
foreach (var wList in tempWordByLength)
{
List<WordDef> wd = wList.ToList<WordDef>();
var tempWordsBySum = from w in wd
group w by w.WordSum into newGroup
orderby newGroup.Key
select newGroup;
Parallel.ForEach(tempWordsBySum, ws =>
{
List<WordDef> we = ws.ToList<WordDef>();
if (we.Count > 1)
{
CheckCandidates(we);
}
});
}
}
private void CheckCandidates(List<WordDef> we)
{
for (int i = 0; i < we.Count; i++)
{
for (int j = i + 1; j < we.Count; j++)
{
if (we[i].Word != we[j].Word)
{
if (we[i].Ordered == we[j].Ordered)
{
we[j].AnagramWord = we[i].Word;
m_Anagramlist.Add(new WordDef() { Word = we[i].Word, AnagramWord = we[j].Word });
}
}
}
}
}
private static string[] GetWords()
{
string htmlCode = string.Empty;
using (WebClient client = new WebClient())
{
htmlCode = client.DownloadString("https://raw.githubusercontent.com/danielmiessler/SecLists/master/Passwords/10_million_password_list_top_1000000.txt");
}
string[] words = htmlCode.Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);
return words;
}
}
}
In order to do this for 2 strings you can do this:
def isAnagram(str1, str2):
str1_list = list(str1)
str1_list.sort()
str2_list = list(str2)
str2_list.sort()
return (str1_list == str2_list)
As for the iteration on the list, it is pretty straight forward
here is the impressive solution.
funct alphabet_count_mapper:
for each word in the file/list
1.create a dictionary of alphabets/characters with initial count as 0.
2.keep count of all the alphabets in the word and increment the count in the above alphabet dict.
3.create alphabet count dict and return the tuple of the values of alphabet dict.
funct anagram_counter:
1.create a dictionary with alphabet count tuple as key and the count of the number of occurences against it.
2.iterate over the above dict and if the value > 1, add the value to the anagram count.
import sys
words_count_map_dict = {}
fobj = open(sys.argv[1],"r")
words = fobj.read().split('\n')[:-1]
def alphabet_count_mapper(word):
alpha_count_dict = dict(zip('abcdefghijklmnopqrstuvwxyz',[0]*26))
for alpha in word:
if alpha in alpha_count_dict.keys():
alpha_count_dict[alpha] += 1
else:
alpha_count_dict.update(dict(alpha=0))
return tuple(alpha_count_dict.values())
def anagram_counter(words):
anagram_count = 0
for word in words:
temp_mapper = alphabet_count_mapper(word)
if temp_mapper in words_count_map_dict.keys():
words_count_map_dict[temp_mapper] += 1
else:
words_count_map_dict.update({temp_mapper:1})
for val in words_count_map_dict.values():
if val > 1:
anagram_count += val
return anagram_count
print anagram_counter(words)
run it with file path as command line argument
Most of previous answers are correct, here is another way to compare two strings. The main benefit of using this strategy versus sort is space/time complexity which is n log of n.
1.Check the length of string
2.Build frequency Dictionary and compare if they both match then we have successfully identified anagram words
def char_frequency(word):
frequency = {}
for char in word:
#if character is in frequency then increment the value
if char in frequency:
frequency[char] += 1
#else add character and set it to 1
else:
frequency[char] = 1
return frequency
a_word ='google'
b_word ='ooggle'
#check length of the words
if (len(a_word) != len(b_word)):
print ("not anagram")
else:
#here we check the frequecy to see if we get the same
if ( char_frequency(a_word) == char_frequency(b_word)):
print("found anagram")
else:
print("no anagram")