java.io.NotSerializableException in Spark Streaming with enabled checkpointing

后端 未结 1 640
萌比男神i
萌比男神i 2021-01-15 12:17

code below:

def main(args: Array[String]) {
    val sc = new SparkContext
    val sec = Seconds(3)
    val ssc = new StreamingContext(sc, sec)
    ssc.checkp         


        
1条回答
  •  情歌与酒
    2021-01-15 13:04

    You can move context initialization and configuration tasks outside main:

    object App {
      val sc = new SparkContext(new SparkConf().setAppName("foo").setMaster("local"))
      val sec = Seconds(3)
      val ssc = new StreamingContext(sc, sec)
      ssc.checkpoint("./checkpoint") // enable checkpoint
    
      def main(args: Array[String]) {
        val rdd = ssc.sparkContext.parallelize(Seq("a", "b", "c"))
        val inputDStream = new ConstantInputDStream(ssc, rdd)
    
        inputDStream.transform(rdd => {
          val buf = ListBuffer[String]()
          buf += "1"
          buf += "2"
          buf += "3"
          val other_rdd = ssc.sparkContext.parallelize(buf)
          rdd.union(other_rdd) // I want to union other RDD
        }).print()
    
        ssc.start()
        ssc.awaitTermination()
      }
    }
    

    0 讨论(0)
提交回复
热议问题