x = [8,2,3,4,5]
y = [6,3,7,2,1]
如何以简洁优雅的方式找出两个列表(在本例中为“2”)中的第一个公共元素?任何列表都可以是空的,也可以没有公共元素——在这种情况下 None 就可以了。
我需要这个来向不熟悉它的人展示 python,所以越简单越好。
UPD:顺序对我的目的并不重要,但假设我正在寻找 x 中也出现在 y 中的第一个元素。
x = [8,2,3,4,5]
y = [6,3,7,2,1]
如何以简洁优雅的方式找出两个列表(在本例中为“2”)中的第一个公共元素?任何列表都可以是空的,也可以没有公共元素——在这种情况下 None 就可以了。
我需要这个来向不熟悉它的人展示 python,所以越简单越好。
UPD:顺序对我的目的并不重要,但假设我正在寻找 x 中也出现在 y 中的第一个元素。
这应该是直截了当的几乎和它一样有效(更有效的解决方案检查Ashwini Chaudharys 的答案和最有效的检查jamylaks 的答案和评论):
result = None
# Go trough one array
for i in x:
# The element repeats in the other list...
if i in y:
# Store the result and break the loop
result = i
break
或者更优雅的事件是使用 PEP 8 封装相同的功能来运行,如编码风格约定:
def get_first_common_element(x,y):
''' Fetches first element from x that is common for both lists
or return None if no such an element is found.
'''
for i in x:
if i in y:
return i
# In case no common element found, you could trigger Exception
# Or if no common element is _valid_ and common state of your application
# you could simply return None and test return value
# raise Exception('No common element found')
return None
如果你想要所有常见的元素,你可以这样做:
>>> [i for i in x if i in y]
[1, 2, 3]
排序不是最快的方法,它可以在 O(N) 时间内用一个集合(散列图)完成。
>>> x = [8,2,3,4,5]
>>> y = [6,3,7,2,1]
>>> set_y = set(y)
>>> next((a for a in x if a in set_y), None)
2
或者:
next(ifilter(set(y).__contains__, x), None)
这就是它的作用:
>>> def foo(x, y):
seen = set(y)
for item in x:
if item in seen:
return item
else:
return None
>>> foo(x, y)
2
为了显示不同方法(朴素方法、二分搜索和集合)之间的时间差异,这里有一些时间。我不得不这样做来反驳那些认为二分搜索更快的人的惊人数量......:
from itertools import ifilter
from bisect import bisect_left
a = [1, 2, 3, 9, 1, 1] * 100000
b = [44, 11, 23, 9, 10, 99] * 10000
c = [1, 7, 2, 4, 1, 9, 9, 2] * 1000000 # repeats early
d = [7, 6, 11, 13, 19, 10, 19] * 1000000
e = range(50000)
f = range(40000, 90000) # repeats in the middle
g = [1] * 10000000 # no repeats at all
h = [2] * 10000000
from random import randrange
i = [randrange(10000000) for _ in xrange(5000000)] # some randoms
j = [randrange(10000000) for _ in xrange(5000000)]
def common_set(x, y, ifilter=ifilter, set=set, next=next):
return next(ifilter(set(y).__contains__, x), None)
pass
def common_b_sort(x, y, bisect=bisect_left, sorted=sorted, min=min, len=len):
sorted_y = sorted(y)
for a in x:
if a == sorted_y[min(bisect_left(sorted_y, a),len(sorted_y)-1)]:
return a
else:
return None
def common_naive(x, y):
for a in x:
for b in y:
if a == b: return a
else:
return None
from timeit import timeit
from itertools import repeat
import threading, thread
print 'running tests - time limit of 20 seconds'
for x, y in [('a', 'b'), ('c', 'd'), ('e', 'f'), ('g', 'h'), ('i', 'j')]:
for func in ('common_set', 'common_b_sort', 'common_naive'):
try:
timer = threading.Timer(20, thread.interrupt_main) # 20 second time limit
timer.start()
res = timeit(stmt="print '[', {0}({1}, {2}), ".format(func, x, y),
setup='from __main__ import common_set, common_b_sort, common_naive, {0}, {1}'.format(x, y),
number=1)
except:
res = "Too long!!"
finally:
print '] Function: {0}, {1}, {2}. Time: {3}'.format(func, x, y, res)
timer.cancel()
测试数据为:
a = [1, 2, 3, 9, 1, 1] * 100000
b = [44, 11, 23, 9, 10, 99] * 10000
c = [1, 7, 2, 4, 1, 9, 9, 2] * 1000000 # repeats early
d = [7, 6, 11, 13, 19, 10, 19] * 1000000
e = range(50000)
f = range(40000, 90000) # repeats in the middle
g = [1] * 10000000 # no repeats at all
h = [2] * 10000000
from random import randrange
i = [randrange(10000000) for _ in xrange(5000000)] # some randoms
j = [randrange(10000000) for _ in xrange(5000000)]
结果:
running tests - time limit of 20 seconds
[ 9 ] Function: common_set, a, b. Time: 0.00569520707241
[ 9 ] Function: common_b_sort, a, b. Time: 0.0182240340602
[ 9 ] Function: common_naive, a, b. Time: 0.00978832505249
[ 7 ] Function: common_set, c, d. Time: 0.249175872911
[ 7 ] Function: common_b_sort, c, d. Time: 1.86735751332
[ 7 ] Function: common_naive, c, d. Time: 0.264309220865
[ 40000 ] Function: common_set, e, f. Time: 0.00966861710078
[ 40000 ] Function: common_b_sort, e, f. Time: 0.0505980508696
[ ] Function: common_naive, e, f. Time: Too long!!
[ None ] Function: common_set, g, h. Time: 1.11300018578
[ None ] Function: common_b_sort, g, h. Time: 14.9472068377
[ ] Function: common_naive, g, h. Time: Too long!!
[ 5411743 ] Function: common_set, i, j. Time: 1.88894859542
[ 5411743 ] Function: common_b_sort, i, j. Time: 6.28617268396
[ 5411743 ] Function: common_naive, i, j. Time: 1.11231867458
这让您了解它将如何扩展更大的输入,O(N) vs O(N log N) vs O(N^2)
一个衬垫,next
用于从生成器中取出第一个项目:
x = [8,2,3,4,5]
y = [6,3,7,2,1]
first = next((a for a in x if a in y), None)
或者更有效,因为set.__contains__
它比 快list.__contains__
:
set_y = set(y)
first = next((a for a in x if a in set_y), None)
或者更有效但仍然在一行中(不要这样做):
first = next((lambda set_y: a for a in x if a in set_y)(set(y)), None)
使用带有的for
循环in
会导致O(N^2)
复杂度,但您可以y
在此处排序并使用二进制搜索将时间复杂度提高到O(NlogN)
.
def binary_search(lis,num):
low=0
high=len(lis)-1
ret=-1 #return -1 if item is not found
while low<=high:
mid=(low+high)//2
if num<lis[mid]:
high=mid-1
elif num>lis[mid]:
low=mid+1
else:
ret=mid
break
return ret
x = [8,2,3,4,5]
y = [6,3,7,2,1]
y.sort()
for z in x:
ind=binary_search(y,z)
if ind!=-1
print z
break
输出:
2
使用该bisect
模块执行与上述相同的操作:
import bisect
x = [8,2,3,4,5]
y = [6,3,7,2,1]
y.sort()
for z in x:
ind=bisect.bisect(y,z)-1 #or use `ind=min(bisect.bisect_left(y, z), len(y) - 1)`
if ind!=-1 and y[ind] ==z:
print z #prints 2
break
我假设你想教这个人 Python,而不仅仅是编程。因此,我会毫不犹豫地使用zip
丑陋的循环变量来代替;它是 Python 中非常有用的部分,不难解释。
def first_common(x, y):
common = set(x) & set(y)
for current_x, current_y in zip(x, y):
if current_x in common:
return current_x
elif current_y in common:
return current_y
print first_common([8,2,3,4,5], [6,3,7,2,1])
如果您真的不想使用zip
,以下是如何在没有的情况下使用:
def first_common2(x, y):
common = set(x) & set(y)
for i in xrange(min(len(x), len(y))):
if x[i] in common:
return x[i]
elif y[i] in common:
return y[i]
对于那些感兴趣的人,这就是它如何扩展到任意数量的序列:
def first_common3(*seqs):
common = set.intersection(*[set(seq) for seq in seqs])
for current_elements in zip(*seqs):
for element in current_elements:
if element in common:
return element
最后,请注意,与其他一些解决方案相比,如果第一个公共元素首先出现在第二个列表中,这也可以正常工作。
我刚刚注意到您的更新,这提供了一个更简单的解决方案:
def first_common4(x, y):
ys = set(y) # We don't want this to be recreated for each element in x
for element in x:
if element in ys:
return element
以上可以说比生成器表达式更具可读性。
太糟糕了,没有内置的有序集。这将是一个更优雅的解决方案。
This one uses sets. It returns the first common element or None if no common element.
def findcommon(x,y):
common = None
for i in range(0,max(len(x),len(y))):
common = set(x[0:i]).intersection(set(y[0:i]))
if common: break
return list(common)[0] if common else None
def first_common_element(x,y):
common = set(x).intersection(set(y))
if common:
return x[min([x.index(i)for i in common])]
使用 for 循环似乎最容易向新人解释。
for number1 in x:
for number2 in y:
if number1 == number2:
print number1, number2
print x.index(number1), y.index(number2)
exit(0)
print "No common numbers found."
NB 未测试,只是在我脑海中。
只是为了好玩(可能效率不高),另一个版本使用itertools
:
from itertools import dropwhile, product
from operator import __ne__
def accept_pair(f):
"Make a version of f that takes a pair instead of 2 arguments."
def accepting_pair(pair):
return f(*pair)
return accepting_pair
def get_first_common(x, y):
try:
# I think this *_ unpacking syntax works only in Python 3
((first_common, _), *_) = dropwhile(
accept_pair(__ne__),
product(x, y))
except ValueError:
return None
return first_common
x = [8, 2, 3, 4, 5]
y = [6, 3, 7, 2, 1]
print(get_first_common(x, y)) # 2
y = [6, 7, 1]
print(get_first_common(x, y)) # None
lambda pair: pair[0] != pair[1]
使用它代替.更简单,但没有那么有趣accept_pair(__ne__)
。
使用 set - 这是任意数量列表的通用解决方案:
def first_common(*lsts):
common = reduce(lambda c, l: c & set(l), lsts[1:], set(lsts[0]))
if not common:
return None
firsts = [min(lst.index(el) for el in common) for lst in lsts]
index_in_list = min(firsts)
trgt_lst_index = firsts.index(index_in_list)
return lsts[trgt_lst_index][index_in_list]
事后的想法 - 不是一个有效的解决方案,这个减少了多余的开销
def first_common(*lsts):
common = reduce(lambda c, l: c & set(l), lsts[1:], set(lsts[0]))
if not common:
return None
for lsts_slice in itertools.izip_longest(*lsts):
slice_intersection = common.intersection(lsts_slice)
if slice_intersection:
return slice_intersection.pop()