SparkSql读取HDFS

public class SparkSqlBathLog {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[2]").setAppName("jsonfile");
// spark 上下文
SparkContext sc = new SparkContext(conf);
//创建sqlContext获取sqlcontext
SQLContext sqlContext = new SQLContext(sc);
//加载txt文件从hdfs
//RDD<String> stringRDD = sc.textFile("hdfs://192.168.48.11:9000/aaa/ssooo.txt");

DataFrame df = sqlContext.read().json("hdfs://node1:9000/aaa/ssooo.txt ");
df.show(30);
//
// DataFrame dfa = sqlContext.read().load("hdfs://192.168.48.11:9000/aaa/ssooo.txt");
// dfa.show(30);
// sqlContext.sql("use hive");
// df.show(20);
DataFrame df1 = sqlContext.read().format("json").load("./test.txt");
df1.show(30);

// df1.coalesce(1).write().format("String").save("hive 路径");

// DataFrame corrupt_record = df.drop("_corrupt_record");
// corrupt_record.show(20);
// DataFrame table = corrupt_record.select("table");
// table.na().drop("all").show(20);
// JavaRDD<Row> rowJavaRDD = table.toJavaRDD();
// df1.show(20);
// df.show();
sc.stop();
}
}

来源：https://www.cnblogs.com/Mr--zhao/p/12212746.html

标签

HDFS

dataframe

易学教程内所有资源均来自网络或用户发布的内容，如有违反法律规定的内容欢迎反馈！
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!