Use “IS IN” between 2 Spark dataframe columns
问题 I have the above dataframe: from pyspark.sql.types import * rdd = sc.parallelize([ ('ALT', ['chien', 'chat'] , 'oiseau'), ('ALT', ['oiseau'] , 'oiseau'), ('TDR', ['poule','poulet'], 'poule' ), ('ALT', ['ours'] , 'chien' ), ('ALT', ['paon'] , 'tigre' ), ('TDR', ['tigre','lion'] , 'lion' ), ('ALT', ['chat'] ,'chien' ), ]) schema = StructType([StructField("ClientId",StringType(),True), StructField("Animaux",ArrayType(StringType(),True),True), StructField("Animal",StringType(),True),]) test = rdd