In SparkSQL,I use DF.wirte.mode(SaveMode.Append).json(xxxx),but this method get these files like
the filename is too complex and random,I can\'t use
You can try this method which I find from somewhere. Process Spark Streaming rdd and store to single HDFS file
import org.apache.hadoop.fs.{ FileSystem, FileUtil, Path }
def saveAsTextFileAndMerge[T](hdfsServer: String, fileName: String, rdd: RDD[T]) = {
val sourceFile = hdfsServer + "/tmp/"
rdd.saveAsTextFile(sourceFile)
val dstPath = hdfsServer + "/final/"
merge(sourceFile, dstPath, fileName)
}
def merge(srcPath: String, dstPath: String, fileName: String): Unit = {
val hadoopConfig = new Configuration()
val hdfs = FileSystem.get(hadoopConfig)
val destinationPath = new Path(dstPath)
if (!hdfs.exists(destinationPath)) {
hdfs.mkdirs(destinationPath)
}
FileUtil.copyMerge(hdfs, new Path(srcPath), hdfs, new Path(dstPath + "/" + fileName), false, hadoopConfig, null)
}