caseclassPeople(name:String,age:Int)
val conf =newSparkConf().setMaster("local[2]").setAppName("反射方式创建DataFrame")
val sc =newSparkContext(conf)
val sqlContext =newSQLContext(sc)
val file:RDD[String]= sc.textFile("E://demo.txt")
val peopleRdd:RDD[People]= file.map(_.split(" ")).map(p =>People(p(0),p(1).toInt))import sqlContext.implicits._
val peopleDF: DataFrame = peopleRdd.toDF()
peopleDF.show()
peopleDF.createOrReplaceTempView("people")
val frame: DataFrame = sqlContext.sql("select * from people")
frame.show()
2.使用StructType的方式创建DataFrame的方式来创建DataFrame
val conf: SparkConf =newSparkConf().setAppName("StructType的方式创建DataFrame").setMaster("local[2]")
val sc =newSparkContext(conf)
val sqlContext =newSQLContext(sc)import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StructField, StructType,StringType,IntegerType}
val file:RDD[String]= sc.textFile("E://demo.txt")
val rowRDD = file.map(_.split(" ")).map(
x=>{Row(x(0),x(1).toInt)})
val schema =StructType(// List(// StructField("name", StringType, true),// StructField("age", IntegerType, true)// )StructField("name", StringType,true)::StructField("age", IntegerType,true)::Nil
)
val peopleDF: DataFrame = sqlContext.createDataFrame(rowRDD,schema)
peopleDF.show()
3.加载json文件、csv文件、jdbc连接数据库等方式来创建DataFrame
val conf: SparkConf =newSparkConf().setAppName("通过json文件的方式创建DataFrame").setMaster("local[2]")
val sc =newSparkContext(conf)
sc.setLogLevel("WARN")
val sqlContext =newSQLContext(sc)//加载方式1
val frame: DataFrame = sqlContext.read.json("E://people.json")//加载方式2//val frame: DataFrame = sqlContext.read.format("json").load("E:// people.json")//val frame: DataFrame = sqlContext.read.parquet("E://people.parquet")
frame.createOrReplaceTempView("people")
sqlContext.sql("select * from people").show()//保存方式
frame.write.json("E://test.json")
frame.write.csv("E://test.csv")
frame.write.parquet("E://test.parquet")
frame.write.format("json").save("E://test1.json")
jdbc方式
val conf =newSparkConf().setMaster("local[2]").setAppName("加载jdbc数据源")
val sc =newSparkContext(conf)
val sqlContext =newSQLContext(sc)
val url ="jdbc:mysql://ip:3306/mk"
val table ="user"
val properties =newProperties()
properties.setProperty("user","youuser")
properties.setProperty("password","youpassword")
properties.setProperty("driver","com.mysql.jdbc.Driver")//方式1
val df = sqlContext.read.jdbc(url,table,properties)//方式2// val jdbcDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/***") //*****这是数据库名// .option("driver", "com.mysql.jdbc.Driver").option("dbtable", "****")//*****是表名// .option("user", "*****").option("password", "*****").load()
df.createOrReplaceTempView("dbs")
sqlContext.sql("select count(1) from dbs").show()