问题
a = [1,2,3,4,5]
b = [1,3,5,6]
c = a and b
print c
actual output: [1,3,5,6]
expected output: [1,3,5]
How can we achieve a boolean AND operation (list intersection) on two lists?
回答1:
If order is not important and you don't need to worry about duplicates then you can use set intersection:
>>> a = [1,2,3,4,5]
>>> b = [1,3,5,6]
>>> list(set(a) & set(b))
[1, 3, 5]
回答2:
If you convert the larger of the two lists into a set, you can get the intersection of that set with any iterable using intersection()
:
a = [1,2,3,4,5]
b = [1,3,5,6]
set(a).intersection(b)
回答3:
Using list comprehensions is a pretty obvious one for me. Not sure about performance, but at least things stay lists.
[x for x in a if x in b]
Or "all the x values that are in A, if the X value is in B".
回答4:
Make a set out of the larger one:
_auxset = set(a)
Then,
c = [x for x in b if x in _auxset]
will do what you want (preserving b
's ordering, not a
's -- can't necessarily preserve both) and do it fast. (Using if x in a
as the condition in the list comprehension would also work, and avoid the need to build _auxset
, but unfortunately for lists of substantial length it would be a lot slower).
If you want the result to be sorted, rather than preserve either list's ordering, an even neater way might be:
c = sorted(set(a).intersection(b))
回答5:
Here's some Python 2 / Python 3 code that generates timing information for both list-based and set-based methods of finding the intersection of two lists.
The pure list comprehension algorithms are O(n^2), since in
on a list is a linear search. The set-based algorithms are O(n), since set search is O(1), and set creation is O(n) (and converting a set to a list is also O(n)). So for sufficiently large n the set-based algorithms are faster, but for small n the overheads of creating the set(s) make them slower than the pure list comp algorithms.
#!/usr/bin/env python
''' Time list- vs set-based list intersection
See http://stackoverflow.com/q/3697432/4014959
Written by PM 2Ring 2015.10.16
'''
from __future__ import print_function, division
from timeit import Timer
setup = 'from __main__ import a, b'
cmd_lista = '[u for u in a if u in b]'
cmd_listb = '[u for u in b if u in a]'
cmd_lcsa = 'sa=set(a);[u for u in b if u in sa]'
cmd_seta = 'list(set(a).intersection(b))'
cmd_setb = 'list(set(b).intersection(a))'
reps = 3
loops = 50000
def do_timing(heading, cmd, setup):
t = Timer(cmd, setup)
r = t.repeat(reps, loops)
r.sort()
print(heading, r)
return r[0]
m = 10
nums = list(range(6 * m))
for n in range(1, m + 1):
a = nums[:6*n:2]
b = nums[:6*n:3]
print('\nn =', n, len(a), len(b))
#print('\nn = %d\n%s %d\n%s %d' % (n, a, len(a), b, len(b)))
la = do_timing('lista', cmd_lista, setup)
lb = do_timing('listb', cmd_listb, setup)
lc = do_timing('lcsa ', cmd_lcsa, setup)
sa = do_timing('seta ', cmd_seta, setup)
sb = do_timing('setb ', cmd_setb, setup)
print(la/sa, lb/sa, lc/sa, la/sb, lb/sb, lc/sb)
output
n = 1 3 2
lista [0.082171916961669922, 0.082588911056518555, 0.0898590087890625]
listb [0.069530963897705078, 0.070394992828369141, 0.075379848480224609]
lcsa [0.11858987808227539, 0.1188349723815918, 0.12825107574462891]
seta [0.26900982856750488, 0.26902294158935547, 0.27298116683959961]
setb [0.27218389511108398, 0.27459001541137695, 0.34307217597961426]
0.305460649521 0.258469975867 0.440838458259 0.301898526833 0.255455833892 0.435697630214
n = 2 6 4
lista [0.15915989875793457, 0.16000485420227051, 0.16551494598388672]
listb [0.13000702857971191, 0.13060092926025391, 0.13543915748596191]
lcsa [0.18650484085083008, 0.18742108345031738, 0.19513416290283203]
seta [0.33592700958251953, 0.34001994132995605, 0.34146714210510254]
setb [0.29436492919921875, 0.2953648567199707, 0.30039691925048828]
0.473793098554 0.387009751735 0.555194537893 0.540689066428 0.441652573672 0.633583767462
n = 3 9 6
lista [0.27657914161682129, 0.28098297119140625, 0.28311991691589355]
listb [0.21585917472839355, 0.21679902076721191, 0.22272896766662598]
lcsa [0.22559309005737305, 0.2271728515625, 0.2323150634765625]
seta [0.36382699012756348, 0.36453008651733398, 0.36750602722167969]
setb [0.34979605674743652, 0.35533690452575684, 0.36164689064025879]
0.760194128313 0.59330170819 0.62005595016 0.790686848184 0.61710008036 0.644927481902
n = 4 12 8
lista [0.39616990089416504, 0.39746403694152832, 0.41129183769226074]
listb [0.33485794067382812, 0.33914685249328613, 0.37850618362426758]
lcsa [0.27405810356140137, 0.2745978832244873, 0.28249192237854004]
seta [0.39211201667785645, 0.39234519004821777, 0.39317893981933594]
setb [0.36988520622253418, 0.37011313438415527, 0.37571001052856445]
1.01034878821 0.85398540833 0.698928091731 1.07106176249 0.905302334456 0.740927452493
n = 5 15 10
lista [0.56792402267456055, 0.57422614097595215, 0.57740211486816406]
listb [0.47309303283691406, 0.47619009017944336, 0.47628307342529297]
lcsa [0.32805585861206055, 0.32813096046447754, 0.3349759578704834]
seta [0.40036201477050781, 0.40322518348693848, 0.40548801422119141]
setb [0.39103078842163086, 0.39722800254821777, 0.43811702728271484]
1.41852623806 1.18166313332 0.819398061028 1.45237674242 1.20986133789 0.838951479847
n = 6 18 12
lista [0.77897095680236816, 0.78187918663024902, 0.78467702865600586]
listb [0.629547119140625, 0.63210701942443848, 0.63321495056152344]
lcsa [0.36563992500305176, 0.36638498306274414, 0.38175487518310547]
seta [0.46695613861083984, 0.46992206573486328, 0.47583580017089844]
setb [0.47616910934448242, 0.47661614418029785, 0.4850609302520752]
1.66818870637 1.34819326075 0.783028414812 1.63591241329 1.32210827369 0.767878297495
n = 7 21 14
lista [0.9703209400177002, 0.9734041690826416, 1.0182771682739258]
listb [0.82394003868103027, 0.82625699043273926, 0.82796716690063477]
lcsa [0.40975093841552734, 0.41210508346557617, 0.42286920547485352]
seta [0.5086359977722168, 0.50968098640441895, 0.51014018058776855]
setb [0.48688101768493652, 0.4879908561706543, 0.49204087257385254]
1.90769222837 1.61990115188 0.805587768483 1.99293236904 1.69228211566 0.841583309951
n = 8 24 16
lista [1.204819917678833, 1.2206029891967773, 1.258256196975708]
listb [1.014998197555542, 1.0206191539764404, 1.0343101024627686]
lcsa [0.50966787338256836, 0.51018595695495605, 0.51319599151611328]
seta [0.50310111045837402, 0.50556015968322754, 0.51335406303405762]
setb [0.51472997665405273, 0.51948785781860352, 0.52113485336303711]
2.39478683834 2.01748351664 1.01305257092 2.34068341135 1.97190418975 0.990165516871
n = 9 27 18
lista [1.511646032333374, 1.5133969783782959, 1.5639569759368896]
listb [1.2461750507354736, 1.254518985748291, 1.2613379955291748]
lcsa [0.5565330982208252, 0.56119203567504883, 0.56451296806335449]
seta [0.5966339111328125, 0.60275578498840332, 0.64791703224182129]
setb [0.54694414138793945, 0.5508568286895752, 0.55375313758850098]
2.53362406013 2.08867620074 0.932788243907 2.76380331728 2.27843203069 1.01753187594
n = 10 30 20
lista [1.7777848243713379, 2.1453688144683838, 2.4085969924926758]
listb [1.5070111751556396, 1.5202279090881348, 1.5779800415039062]
lcsa [0.5954139232635498, 0.59703707695007324, 0.60746097564697266]
seta [0.61563014984130859, 0.62125110626220703, 0.62354087829589844]
setb [0.56723213195800781, 0.57257509231567383, 0.57460403442382812]
2.88774814689 2.44791645689 0.967161734066 3.13413984189 2.6567803378 1.04968299523
Generated using a 2GHz single core machine with 2GB of RAM running Python 2.6.6 on a Debian flavour of Linux (with Firefox running in the background).
These figures are only a rough guide, since the actual speeds of the various algorithms are affected differently by the proportion of elements that are in both source lists.
回答6:
a = [1,2,3,4,5]
b = [1,3,5,6]
c = list(set(a).intersection(set(b)))
Should work like a dream. And, if you can, use sets instead of lists to avoid all this type changing!
回答7:
A functional way can be achieved using filter
and lambda
operator.
list1 = [1,2,3,4,5,6]
list2 = [2,4,6,9,10]
>>> list(filter(lambda x:x in list1, list2))
[2, 4, 6]
Edit: It filters out x that exists in both list1 and list, set difference can also be achieved using:
>>> list(filter(lambda x:x not in list1, list2))
[9,10]
Edit2: python3 filter
returns a filter object, encapsulating it with list
returns the output list.
回答8:
If, by Boolean AND, you mean items that appear in both lists, e.g. intersection, then you should look at Python's set and frozenset types.
回答9:
This is an example when you need Each element in the result should appear as many times as it shows in both arrays.
def intersection(nums1, nums2):
#example:
#nums1 = [1,2,2,1]
#nums2 = [2,2]
#output = [2,2]
#find first 2 and remove from target, continue iterating
target, iterate = [nums1, nums2] if len(nums2) >= len(nums1) else [nums2, nums1] #iterate will look into target
if len(target) == 0:
return []
i = 0
store = []
while i < len(iterate):
element = iterate[i]
if element in target:
store.append(element)
target.remove(element)
i += 1
return store
回答10:
It might be late but I just thought I should share for the case where you are required to do it manually (show working - haha) OR when you need all elements to appear as many times as possible or when you also need it to be unique.
Kindly note that tests have also been written for it.
from nose.tools import assert_equal
'''
Given two lists, print out the list of overlapping elements
'''
def overlap(l_a, l_b):
'''
compare the two lists l_a and l_b and return the overlapping
elements (intersecting) between the two
'''
#edge case is when they are the same lists
if l_a == l_b:
return [] #no overlapping elements
output = []
if len(l_a) == len(l_b):
for i in range(l_a): #same length so either one applies
if l_a[i] in l_b:
output.append(l_a[i])
#found all by now
#return output #if repetition does not matter
return list(set(output))
else:
#find the smallest and largest lists and go with that
sm = l_a if len(l_a) len(l_b) else l_b
for i in range(len(sm)):
if sm[i] in lg:
output.append(sm[i])
#return output #if repetition does not matter
return list(set(output))
## Test the Above Implementation
a = [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
b = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
exp = [1, 2, 3, 5, 8, 13]
c = [4, 4, 5, 6]
d = [5, 7, 4, 8 ,6 ] #assuming it is not ordered
exp2 = [4, 5, 6]
class TestOverlap(object):
def test(self, sol):
t = sol(a, b)
assert_equal(t, exp)
print('Comparing the two lists produces')
print(t)
t = sol(c, d)
assert_equal(t, exp2)
print('Comparing the two lists produces')
print(t)
print('All Tests Passed!!')
t = TestOverlap()
t.test(overlap)
来源:https://stackoverflow.com/questions/3697432/how-to-find-list-intersection