transform
一种转换算子
应用在DStream上,可以用于执行任意的RDD到RDD的转换操作。他可以用于实现,DStream API中所没有提供的操作。
package com.shsxt.spark.scala
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Created by BF-Lone Silver Wind on 2020-01-02
*/
object transform {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("Tranform")
val ssc = new StreamingContext(conf, Seconds(5))
val fileDS = ssc.socketTextStream("192.168.241.211",9999)
val wordcountDS = fileDS.flatMap{line => line.split("\t")}
.map{word => (word,1)}
/**
* 假设这个是黑名单
*
* */
val fillter = ssc.sparkContext.parallelize(List(",","?","!",".")).map{param => (param,true) }
val needwordDS = wordcountDS.transform(rdd =>{
val leftRDD = rdd.leftOuterJoin(fillter);
val needword=leftRDD.filter( tuple =>{
val x= tuple._1;
val y=tuple._2;
if(y._2.isEmpty){
true;
}else{
false;
}
})
needword.map(tuple =>(tuple._1,1))
})
val wcDS= needwordDS.reduceByKey(_+_);
wcDS.print();
ssc.start();
ssc.awaitTermination();
}
}
输入数据测试即可
来源:CSDN
作者:BF-LoneSilverWind
链接:https://blog.csdn.net/digua930126/article/details/103811447