RDD&Dataset&DataFrame
Dataset创建 object DatasetCreation { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("SparkSessionTest") .getOrCreate() import spark.implicits._ //1: range val ds1 = spark.range(0, 10, 2, 2) ds1.show() val dogs = Seq(Dog("jitty", "red"), Dog("mytty", "yellow")) val cats = Seq(new Cat("jitty", 2), new Cat("mytty", 4)) //2: 从Seq[T]中创建 val data = dogs val ds = spark.createDataset(data) ds.show() //3: 从RDD[T]中创建 val dogRDD = spark.sparkContext.parallelize(dogs) val dogDS = spark.createDataset(dogRDD) dogDS.show() val catRDD = spark.sparkContext.parallelize(cats)