例如,如果我有一个字符串列表:
["car", "tree", "boy", "girl", "arc"...]
我应该怎么做才能在该列表中找到字谜?例如(car, arc)
. 我尝试对每个字符串使用 for 循环,并使用if
它来忽略不同长度的字符串,但我无法得到正确的结果。
如何遍历字符串中的每个字母并以不同的顺序将其与列表中的其他字母进行比较?
我已经阅读了几个类似的问题,但答案太高级了。我不能导入任何东西,我只能使用基本功能。
In order to do this for 2 strings you can do this:
def isAnagram(str1, str2):
str1_list = list(str1)
str1_list.sort()
str2_list = list(str2)
str2_list.sort()
return (str1_list == str2_list)
As for the iteration on the list, it is pretty straight forward
Create a dictionary of (sorted word, list of word). All the words that are in the same list are anagrams of each other.
from collections import defaultdict
def load_words(filename='/usr/share/dict/american-english'):
with open(filename) as f:
for word in f:
yield word.rstrip()
def get_anagrams(source):
d = defaultdict(list)
for word in source:
key = "".join(sorted(word))
d[key].append(word)
return d
def print_anagrams(word_source):
d = get_anagrams(word_source)
for key, anagrams in d.iteritems():
if len(anagrams) > 1:
print(key, anagrams)
word_source = load_words()
print_anagrams(word_source)
Or:
word_source = ["car", "tree", "boy", "girl", "arc"]
print_anagrams(word_source)
一种解决方案是对您正在搜索字谜的单词进行排序(例如使用sorted
),对替代项进行排序并进行比较。
因此,如果您要在 list 中搜索 'rac' 的字谜['car', 'girl', 'tofu', 'rca']
,您的代码可能如下所示:
word = sorted('rac')
alternatives = ['car', 'girl', 'tofu', 'rca']
for alt in alternatives:
if word == sorted(alt):
print alt
这个问题有多种解决方案:
经典方法
首先,让我们考虑什么定义了字谜:如果两个单词由相同的一组字母组成,并且每个字母在两个单词中出现的数字或时间完全相同,那么它们就是彼此的字谜。这基本上是每个单词的字母计数的直方图。这是collections.Counter
数据结构的完美用例(请参阅文档)。算法如下:
这是代码:
from collections import Counter, defaultdict
def anagram(words):
anagrams = defaultdict(list)
for word in words:
histogram = tuple(Counter(word).items()) # build a hashable histogram
anagrams[histogram].append(word)
return list(anagrams.values())
keywords = ("hi", "hello", "bye", "helol", "abc", "cab",
"bac", "silenced", "licensed", "declines")
print(anagram(keywords))
请注意,构造Counter
is O(l)
,而对每个单词进行排序是O(n*log(l))
其中 l 是单词的长度。
使用素数解决字谜
这是一个更高级的解决方案,它依赖于素数的“乘法唯一性”。您可以参考这篇 SO 帖子:Comparing anagrams using prime numbers,这里是一个示例 python 实现。
Sort each element then look for duplicates. There's a built-in function for sorting so you do not need to import anything
由于您无法导入任何内容,因此这里有两种不同的方法,包括您要求的 for 循环。
方法 1:For 循环和内置排序函数
word_list = ["percussion", "supersonic", "car", "tree", "boy", "girl", "arc"]
# initialize a list
anagram_list = []
for word_1 in word_list:
for word_2 in word_list:
if word_1 != word_2 and (sorted(word_1)==sorted(word_2)):
anagram_list.append(word_1)
print(anagram_list)
方法2:字典
def freq(word):
freq_dict = {}
for char in word:
freq_dict[char] = freq_dict.get(char, 0) + 1
return freq_dict
# initialize a list
anagram_list = []
for word_1 in word_list:
for word_2 in word_list:
if word_1 != word_2 and (freq(word_1) == freq(word_2)):
anagram_list.append(word_1)
print(anagram_list)
如果您想更详细地解释这些方法,这里有一篇文章。
def all_anagrams(words: [str]) -> [str]:
word_dict = {}
for word in words:
sorted_word = "".join(sorted(word))
if sorted_word in word_dict:
word_dict[sorted_word].append(word)
else:
word_dict[sorted_word] = [word]
return list(word_dict.values())
以前的大多数答案都是正确的,这是比较两个字符串的另一种方法。与排序相比,使用此策略的主要好处是空间/时间复杂度,即n log of n。
1.检查字符串的长度
2.建立频率词典并比较它们是否匹配,那么我们已经成功识别了字谜词
def char_frequency(word):
frequency = {}
for char in word:
#if character is in frequency then increment the value
if char in frequency:
frequency[char] += 1
#else add character and set it to 1
else:
frequency[char] = 1
return frequency
a_word ='google'
b_word ='ooggle'
#check length of the words
if (len(a_word) != len(b_word)):
print ("not anagram")
else:
#here we check the frequecy to see if we get the same
if ( char_frequency(a_word) == char_frequency(b_word)):
print("found anagram")
else:
print("no anagram")
def findanagranfromlistofwords(li):
dict = {}
index=0
for i in range(0,len(li)):
originalfirst = li[index]
sortedfirst = ''.join(sorted(str(li[index])))
for j in range(index+1,len(li)):
next = ''.join(sorted(str(li[j])))
print next
if sortedfirst == next:
dict.update({originalfirst:li[j]})
print "dict = ",dict
index+=1
print dict
findanagranfromlistofwords(["car", "tree", "boy", "girl", "arc"])
# list of words
words = ["ROOPA","TABU","OOPAR","BUTA","BUAT" , "PAROO","Soudipta",
"Kheyali Park", "Tollygaunge", "AROOP","Love","AOORP",
"Protijayi","Paikpara","dipSouta","Shyambazaar",
"jayiProti", "North Calcutta", "Sovabazaar"]
#Method 1
A = [''.join(sorted(word)) for word in words]
dict ={}
for indexofsamewords,samewords in enumerate(A):
dict.setdefault(samewords, []).append(indexofsamewords)
print(dict)
#{'AOOPR': [0, 2, 5, 9, 11], 'ABTU': [1, 3, 4], 'Sadioptu': [6, 14], ' KPaaehiklry': [7], 'Taeggllnouy': [8], 'Leov': [10], 'Paiijorty': [12, 16], 'Paaaikpr': [13], 'Saaaabhmryz': [15], ' CNaachlortttu': [17], 'Saaaaborvz': [18]}
for index in dict.values():
print( [words[i] for i in index ] )
输出 :
['ROOPA', 'OOPAR', 'PAROO', 'AROOP', 'AOORP']
['TABU', 'BUTA', 'BUAT']
['Soudipta', 'dipSouta']
['Kheyali Park']
['Tollygaunge']
['Love']
['Protijayi', 'jayiProti']
['Paikpara']
['Shyambazaar']
['North Calcutta']
['Sovabazaar']
我正在使用字典来一个一个地存储字符串的每个字符。然后遍历第二个字符串并在字典中查找字符,如果存在则减少字典中相应键的计数。
class Anagram:
dict = {}
def __init__(self):
Anagram.dict = {}
def is_anagram(self,s1, s2):
print '***** starting *****'
print '***** convert input strings to lowercase'
s1 = s1.lower()
s2 = s2.lower()
for i in s1:
if i not in Anagram.dict:
Anagram.dict[i] = 1
else:
Anagram.dict[i] += 1
print Anagram.dict
for i in s2:
if i not in Anagram.dict:
return false
else:
Anagram.dict[i] -= 1
print Anagram.dict
for i in Anagram.dict.keys():
if Anagram.dict.get(i) == 0:
del Anagram.dict[i]
if len(Anagram.dict) == 0:
print Anagram.dict
return True
else:
return False
import collections
def find_anagrams(x):
anagrams = [''.join(sorted(list(i))) for i in x]
anagrams_counts = [item for item, count in collections.Counter(anagrams).items() if count > 1]
return [i for i in x if ''.join(sorted(list(i))) in anagrams_counts]
python中的解决方案可以如下:
class Word:
def __init__(self, data, index):
self.data = data
self.index = index
def printAnagrams(arr):
dupArray = []
size = len(arr)
for i in range(size):
dupArray.append(Word(arr[i], i))
for i in range(size):
dupArray[i].data = ''.join(sorted(dupArray[i].data))
dupArray = sorted(dupArray, key=lambda x: x.data)
for i in range(size):
print arr[dupArray[i].index]
def main():
arr = ["dog", "act", "cat", "god", "tac"]
printAnagrams(arr)
if __name__== '__main__':
main()
上面的时间复杂度是 O(NMLogN + NMLogM) = O(NMlogN)
Python中的简单解决方案:
def anagram(s1,s2):
# Remove spaces and lowercase letters
s1 = s1.replace(' ','').lower()
s2 = s2.replace(' ','').lower()
# Return sorted match.
return sorted(s1) == sorted(s2)
这工作正常:
def find_ana(l):
a=[]
for i in range(len(l)):
for j in range(len(l)):
if (l[i]!=l[j]) and (sorted(l[i])==sorted(l[j])):
a.append(l[i])
a.append(l[j])
return list(set(a))
这个可以帮助你:
假设输入以逗号分隔的字符串形式给出
控制台输入:abc,bac,car,rac,pqr,acb,acr,abc
in_list = list()
in_list = map(str, raw_input("Enter strings seperated by comma").split(','))
list_anagram = list()
for i in range(0, len(in_list) - 1):
if sorted(in_list[i]) not in list_anagram:
for j in range(i + 1, len(in_list)):
isanagram = (sorted(in_list[i]) == sorted(in_list[j]))
if isanagram:
list_anagram.append(sorted(in_list[i]))
print in_list[i], 'isanagram'
break
集合是输出的适当数据结构,因为您可能不希望输出中有冗余。如果以前观察到特定的字母序列,以及它最初来自哪个单词,字典是查找的理想选择。利用我们可以在不扩展集合的情况下多次将同一个项目添加到集合中这一事实,我们可以摆脱一个 for 循环。
def return_anagrams(word_list):
d = {}
out = set()
for word in word_list:
s = ''.join(sorted(word))
try:
out.add(d[s])
out.add(word)
except:
d[s] = word
return out
一种更快的方法是利用加法的交换性:
import numpy as np
def vector_anagram(l):
d, out = dict(), set()
for word in l:
s = np.zeros(26, dtype=int)
for c in word:
s[ord(c)-97] += 1
s = tuple(s)
try:
out.add(d[s])
out.add(word)
except:
d[s] = word
return out
只需使用Python3 集合包中可用的Counter 方法。
str1="abc"
str2="cab"
Counter(str1)==Counter(str2)
# returns True i.e both Strings are anagrams of each other.
这是令人印象深刻的解决方案。
功能字母计数映射器:
对于文件/列表中的每个单词
1.创建一个初始计数为0的字母/字符字典。
2.保持单词中所有字母的计数并增加上述字母字典中的计数。
3.创建字母计数字典并返回字母字典值的元组。
函数字谜计数器:
1.创建一个以字母计数元组为键的字典,并计算针对它的出现次数。
2.遍历上面的dict,如果值> 1,则将该值添加到字谜计数中。
import sys
words_count_map_dict = {}
fobj = open(sys.argv[1],"r")
words = fobj.read().split('\n')[:-1]
def alphabet_count_mapper(word):
alpha_count_dict = dict(zip('abcdefghijklmnopqrstuvwxyz',[0]*26))
for alpha in word:
if alpha in alpha_count_dict.keys():
alpha_count_dict[alpha] += 1
else:
alpha_count_dict.update(dict(alpha=0))
return tuple(alpha_count_dict.values())
def anagram_counter(words):
anagram_count = 0
for word in words:
temp_mapper = alphabet_count_mapper(word)
if temp_mapper in words_count_map_dict.keys():
words_count_map_dict[temp_mapper] += 1
else:
words_count_map_dict.update({temp_mapper:1})
for val in words_count_map_dict.values():
if val > 1:
anagram_count += val
return anagram_count
print anagram_counter(words)
使用文件路径作为命令行参数运行它
您将单词中的每个字符转换为数字(通过ord()函数),将它们相加为单词。如果两个词的总和相同,那么它们就是字谜。然后过滤列表中出现两次以上的总和。
def sumLet(w):
return sum([ord(c) for c in w])
def find_anagrams(l):
num_l = map(sumLet,l)
return [l[i] for i,num in enumerate(num_l) if num_l.count(num) > 1]
>>> words = ["car", "race", "rac", "ecar", "me", "em"]
>>> anagrams = {}
... for word in words:
... reverse_word=word[::-1]
... if reverse_word in words:
... anagrams[word] = (words.pop(words.index(reverse_word)))
>>> anagrams
20: {'car': 'rac', 'me': 'em', 'race': 'ecar'}
逻辑:
如果你想用java解决方案,
public List<String> findAnagrams(List<String> dictionary) {
// TODO do null check and other basic validations.
Map<String, List<String>> wordMap = new HashMap<String, List<String>>();
for(String word : dictionary) {
// ignore if word is null
char[] tempWord = word.tocharArray();
Arrays.sort(tempWord);
String newWord = new String(tempWord);
if(wordMap.containsKey(newWord)) {
wordMap.put(newWord, wordMap.get(word).add(word));
} else {
wordMap.put(newWord, new ArrayList<>() {word});
}
}
List<String> anagrams = new ArrayList<>();
for(String key : wordMap.keySet()) {
if(wordMap.get(key).size() > 1) {
anagrams.addAll(wordMap.get(key));
}
}
return anagrams;
}