package Test01
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Dataset, SQLContext, SparkSession}
/**
* 如果使用DataFrame或SQL读取数据,先将非结构化数据转化成结构化数据
* 然后注册视图,执行Sql,最好触发action
*/
case class Emp(empno:Int,ename:String,job:String,mgr:String,hiredate:String,sal:Int,comm:String,deptno:Int)
object SparkSql01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SparkSql01").setMaster("local[*]")
val sc = new SparkContext(conf)
//sqlContext是对sparkContext的一个包装(增强了类功能,可以处理结构化的数据)
val sqlContext = new SQLContext(sc)
//读取数据源
val lines: RDD[Array[String]] = sc.textFile(args(0)).map(_.split(","))
//创建映射关系
val allEmp = lines.map(x=>Emp(x(0).toInt,x(1),x(2),x(3),x(4),x(5).toInt,x(6),x(7).toInt))
//导入隐式转换,将RDD转换成DataFrame
import sqlContext.implicits._
val df1 = allEmp.toDF()
df1.show()
//释放资源
sc.stop()
}
}
第二种写法
package Test01
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
object SQLDemox2 {
def main(args: Array[String]): Unit = {
//Spark Rdd 程序的执行入口
val conf = new SparkConf().setAppName("SQLDemox2").setMaster("local[*]")
val sc = new SparkContext(conf)
//SqlContext是对sparkconext的一个包装(增强了类功能,可以处理结构化数据)
val sqlcontext = new SQLContext(sc)
//读取数据源
val lines: RDD[Array[String]] = sc.textFile(args(0)).map(_.split(","))
//整理数据Row + schema
val rowRDD: RDD[Row] = lines.map(line => {
Row(line(0).toInt, line(1), line(2), line(3), line(4), line(5).toInt, line(6), line(7).toInt)
})
//创建表结构
val schema = StructType {
List(
StructField("empno", IntegerType),
StructField("ename", StringType),
StructField("job", StringType),
StructField("mgr", StringType),
StructField("hiredate", StringType),
StructField("sal", IntegerType),
StructField("comm", StringType),
StructField("deptno", IntegerType)
)
}
//RDD关联Schema
val df: DataFrame = sqlcontext.createDataFrame(rowRDD,schema)
val result = df.select("empno","ename","sal")
result.show()
//释放资源
sc.stop()
}
}