可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效，请关闭广告屏蔽插件后再试):

问题:

I have a Pandas dataframe where the values are lists:

import pandas as pd  DF = pd.DataFrame({'X':[[1, 5], [1, 2]], 'Y':[[1, 2, 5], [1, 3, 5]]}) DF          X          Y 0   [1, 5]  [1, 2, 5] 1   [1, 2]  [1, 3, 5]

I want to check if the lists in X are subsets of the lists in Y. With individual lists, we can do this using set(x).issubset(set(y)). But how would we do this across Pandas data columns?

So far, the only thing I've come up with is to use the individual lists as a workaround, then convert the result back to Pandas. Seems a bit complicated for this task:

foo = [set(DF['X'][i]).issubset(set(DF['Y'][i])) for i in range(len(DF['X']))]  foo = pd.DataFrame(foo) foo.columns = ['x_sub_y'] pd.merge(DF, foo, how = 'inner', left_index = True, right_index = True)           X          Y   x_sub_y 0   [1, 5]  [1, 2, 5]   True 1   [1, 2]  [1, 3, 5]   False

Is there a easier way to achieve this? Possibly using .map or .apply?

回答1:

Use set and issubset:

DF.assign(x_sub_y = DF.apply(lambda x: set(x.X).issubset(set(x.Y)), axis=1))

Output:

        X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

回答2:

Option 1
set conversion and difference using np.where

df_temp = DF.applymap(set) DF['x_sub_y'] = np.where(df_temp.X - df_temp.Y, False, True) DF         X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

Option 2
Faster, astype conversion

DF['x_sub_y'] = ~(DF.X.apply(set) - DF.Y.apply(set)).astype(bool) DF          X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

Option 3
Fun with np.vectorize

def foo(x):      return not x  v = np.vectorize(foo)     DF['x_sub_y'] = v(DF.X.apply(set) - DF.Y.apply(set))  DF         X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

Extending Scott Boston's answer for speed using the same approach:

def foo(x, y):     return set(x).issubset(y)  v = np.vectorize(foo)  DF['x_sub_y'] = v(DF.X, DF.Y) DF         X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

Small

Large (`df 10000`*)

1 loop, best of 3: 1.26 s per loop               # Before    100 loops, best of 3: 13.3 ms per loop           # After

回答3:

Or you can try set

DF['x_sub_y']=DF.X+DF.Y DF['x_sub_y']=DF['x_sub_y'].apply(lambda x : list(set(x)))==DF.Y DF Out[691]:          X          Y  x_sub_y 0  [1, 5]  [1, 2, 5]     True 1  [1, 2]  [1, 3, 5]    False

文章来源: Mapping methods across multiple columns in a Pandas DataFrame

标签

sub

Mapping methods across multiple columns in a Pandas DataFrame

问题:

回答1:

回答2:

Small

Large (df * 10000)

回答3:

Large (`df 10000`*)