org.scala-lang
scala-library
${scala.version}
junit
junit
4.4
test
org.specs
specs
1.2.5
test
com.oracle
ojdbc6
11.2.0.3
mysql
mysql-connector-java
${mysql.version}
com.alibaba
druid
${druid.version}
org.apache.spark
spark-core_2.11
${spark.verson}
org.apache.spark
spark-streaming_2.11
${spark.verson}
provided
org.apache.spark
spark-sql_2.11
${spark.verson}
org.apache.spark
spark-hive_2.11
${spark.verson}
2.代码
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext
object HiveDataSource extends App {
val config = new SparkConf().setAppName("HiveDataSource").setMaster("local")
val sc = new SparkContext(config)
val sqlContext = new HiveContext(sc)
sqlContext.sql("drop table if exists default.student_infos")
sqlContext.sql("create table if not exists default.student_infos (name string,age int) row format delimited fields terminated by ',' stored as textfile")
sqlContext.sql("load data inpath '/tmp/student_infos.txt' into table default.student_infos")
// 用同样的方式,给student_scores导入数据
sqlContext.sql("DROP TABLE IF EXISTS default.student_scores")
sqlContext.sql("create table if not exists default.student_scores (name string,score int) row format delimited fields terminated by ',' stored as textfile")
sqlContext.sql("load data inpath '/tmp/student_scores.txt' into table default.student_scores")
// 关联两张表执行查询,查询成绩大于80分的学生
val goodStudentDf = sqlContext.sql("select t1.name,t1.age,t2.score from default.student_infos t1 join default.student_scores t2 on t1.name = t2.name")
goodStudentDf.show()
}