Spark中DataSet使用实例
package nj.zb.kb11
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
object DataSetDemo {
case class Point(label:String,x:Double,y:Double)
case class Category(id:Long,name:String)
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("DataSetDemo")
val sc: SparkContext = SparkContext.getOrCreate(conf)
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("dsDemo")
.getOrCreate()
import spark.implicits._
val points = Seq(Point("njzb",23.23,48.74),Point("njnz",26.12,48.77))
val pointsDS: Dataset[Point] = points.toDS()
val categories = Seq(Category(1,"njzb"),Category(2,"njnz"))
val categoriesDS: Dataset[Category] = categories.toDS()
}
}
Spark中DataFrame使用实例
package nj.zb.kb11
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types._
object DataFrameDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("sparkreadcsv")
val sc = SparkContext.getOrCreate(conf)
val spark: SparkSession = SparkSession.builder().appName("DataFrameDemo").master("local[*]").getOrCreate()
import spark.implicits._
val people: RDD[String] = sc.textFile("in/people.txt")
val schemaString="id name age"
val schema: StructType = StructType(schemaString.split(" ").map(x=>StructField(x,StringType,true)))
val fields = Array(
StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("age", IntegerType, true)
)
val row: RDD[Row] = people.map(x =>
x.split(" ")).map(x => Row(x(0), x(1), x(2)))
val peopleDF: DataFrame = spark.createDataFrame(row,schema)
peopleDF.printSchema()
peopleDF.show()
peopleDF.createOrReplaceTempView("people")
val a: DataFrame = spark.sql("select name from people where age > 35")
a.show()
val frame1: DataFrame = spark.read.parquet("out/parquettest")
frame1.printSchema()
frame1.show()
}
}