/** * sparksql集成hive的基本操作 * 需求: * 把在jdbc操作过程使用hive来一遍 * 数据源---->Hive总的表 * teacher_basic.txt * name,age,married,children * teacher_info.txt * name,height * 最终实际上要的一个效果就是 * teacher_basic.txt.join(teacher_info.txt) * 将 * name,age,married,height,children * 保存到hive中的表 * teachers * */ object ScalaSparkSQLHiveOps extends App { val conf = new SparkConf().setAppName("ScalaSparkSQLHiveOps") /** * conf.setMaster("spark://master:7077")和在spark-submit.sh脚本中设置为--master spark://master:7077 * 之间的区别? * 第二种方式在该案例中无法运行程序,第一种方式可以,为什么? */ conf.setMaster("spark://master:7077") val sc = new SparkContext(conf) val hiveContext = new HiveContext(sc) /** * 步骤: * 1、在hive中创建两个表 * create table teacher_basic * create table teacher_info * 2、将数据加载进这两表中 * load data local inpath '/opt/data/spark/sql/teacher_basic.txt' into table teacher_basic * load data local inpath '/opt/data/spark/sql/teacher_info.txt' into table teacher_info * 3、进行hiveql的join操作 * select id, name, age, height, married, children from teacher_basic * tb left join teacher_info ti on tb.name = ti.name * 4、将计算之后的(步骤3)数据保存到一张表teachers */ /** * step-1、在Hive中创建两张表 */ hiveContext.sql("create database if not exists spark_hive") //name,age,married,children hiveContext.sql("create table if not exists spark_hive.teacher_basic(name string, age int, married boolean, children int) row format delimited fields terminated by ','") hiveContext.sql("create table if not exists spark_hive.teacher_info(name string, height int) row format delimited fields terminated by ','")
/** * step-2: 加载数据到这两张表中 */ hiveContext.sql("load data local inpath '/opt/data/spark/sql/teacher_basic.txt' overwrite into table spark_hive.teacher_basic") hiveContext.sql("load data local inpath '/opt/data/spark/sql/teacher_info.txt' overwrite into table spark_hive.teacher_info")
/** * step-3:进行多表关联计算 * name,age,married,height,children */ val joinedDF = hiveContext.sql("select tb.name, tb.age, tb.married, ti.height, tb.children from spark_hive.teacher_basic tb left join spark_hive.teacher_info ti on tb.name = ti.name") /** * step-4:数据落地 */ joinedDF.write.saveAsTable("spark_hive.teachers") sc.stop }