How to resolve the AnalysisException: resolved attribute(s) in Spark

后端 未结 12 911
故里飘歌
故里飘歌 2020-12-14 07:03
val rdd = sc.parallelize(Seq((\"vskp\", Array(2.0, 1.0, 2.1, 5.4)),(\"hyd\",Array(1.5, 0.5, 0.9, 3.7)),(\"hyd\", Array(1.5, 0.5, 0.9, 3.2)),(\"tvm\", Array(8.0, 2.9,         


        
12条回答
  •  醉酒成梦
    2020-12-14 07:31

    Thanks to Tomer's Answer

    For scala - The issue came up when I tried to use the column in the self-join clause, to fix it use the method

    // To `and` all the column conditions
    def andAll(cols: Iterable[Column]): Column =
       if (cols.isEmpty) lit(true)
       else cols.tail.foldLeft(cols.head) { case (soFar, curr) => soFar.and(curr) }
    
    // To perform join different col name
    def renameColAndJoin(leftDf: DataFrame, joinCols: Seq[String], joinType: String = "inner")(rightDf: DataFrame): DataFrame = {
    
       val renamedCols: Seq[String]          = joinCols.map(colName => s"${colName}_renamed")
       val zippedCols: Seq[(String, String)] = joinCols.zip(renamedCols)
    
       val renamedRightDf: DataFrame = zippedCols.foldLeft(rightDf) {
         case (df, (origColName, renamedColName)) => df.withColumnRenamed(origColName, renamedColName)
       }
    
       val joinExpr: Column = andAll(zippedCols.map {
         case (origCol, renamedCol) => renamedRightDf(renamedCol).equalTo(rightDf(origCol))
       })
    
       leftDf.join(renamedRightDf, joinExpr, joinType)
    
    }
    

提交回复
热议问题