object DataSetDemo1 {
def main(args: Array[String]): Unit = {
val session = SparkSession.builder()
.master("local")
.appName(this.getClass.getSimpleName)
.getOrCreate()
import session.implicits._
val dataSet: Dataset[String] = session.read.textFile("D:\\abc\\person\\input\\person2.txt")
val ds = dataSet.map(line => {
val splits = line.split(",")
Student(splits(0), splits(1).toInt, splits(2).toInt)
})
val rdd: RDD[String] = session.sparkContext.makeRDD(Array("aa","bb","cc"))
val ds2: Dataset[String] = session.createDataset(rdd)
//查询
ds.select("name","age").show()
ds.select($"name",$"age").show()
//别名
ds.selectExpr("name as newname").show()
//过滤 peopleDs.where($"age" > 15)
ds.where($"age" > 18).show()
ds.filter($"age" > 18).show()
//给age+1 ds.select(expr("value + 1").as[Int])
import org.apache.spark.sql.functions._
ds.select(expr("age + 1").as[Int]).show()
ds.select($"age" + 1).show()
//排序
//ds.sort("sortcol")
//ds.sort($"sortcol")
//ds.sort($"sortcol".asc)
ds.sort($"age".desc,$"score").show()
//求总条数
println(ds.count())
//聚合操作
ds.groupBy("name").agg(sum("score")).show()
ds.groupBy("name").count().show()
val df1 = session.read.textFile("D:\\abc\\join\\student.txt")
.map(line => {
val splits = line.split(",")
(splits(0).toInt, splits(1))
}).toDF("id", "name")
val df2 = session.read.textFile("D:\\abc\\join\\weigth.txt")
.map(line => {
val splits = line.split(",")
(splits(0).toInt, splits(1).toFloat)
}).toDF("uid", "weight")
//join on的字段名相同
//df1.join(df2, "user_id")
//df1.join(df2,"id").show()
//on的字段名不相同
//df1.join(df2, $"df1Key" === $"df2Key")
//df1.join(df2).where($"df1Key" === $"df2Key")
//这里的相等,是三个等于号
df1.join(df2,$"id"=== $"uid").show()
//默认是内连接
//df1.join(df2, $"df1Key" === $"df2Key", "outer")
//`inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
// `right`, `right_outer`, `left_semi`, `left_anti`.
df1.join(df2,$"id" === $"uid","left_outer").show()
//笛卡尔积
df1.crossJoin(df2).show()
//字段的重命名
df1.withColumnRenamed("id","newid").show()
session.stop()
}
}
来源:CSDN
作者:冯。
链接:https://blog.csdn.net/manweizhizhuxia/article/details/104193727