解决方案:mysql的jar包不匹配,所以从hive中cp mysql.jar 进到spark的jars中
package com.cmd.spark
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
object Spark5 {
def main(args: Array[String]): Unit = {
//配置文件对象
val conf = new SparkConf().setAppName(“spark_hive”)
//2.得到对象HiveCOntext对象
val hiveContext = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
// //3.创建hive表
val createSql = “create table if not exists t_student(id int,name String) row format delimited fields terminated by ‘,’”
hiveContext.sql(createSql)
//4.加载数据
val loadData = “load data local inpath ‘/student.log’ into table t_student”
hiveContext.sql(loadData)
//5.获取hive中的数据.放入SparkSql中DataFrame
val df:DataFrame = hiveContext.sql(“select * from t_student”)
//6.处理 求出
val resultd2 = df.where(“id>2”)
//7.Dataframe的DLF风格。
resultd2.createOrReplaceTempView(“result01”)
hiveContext.sql(“create table if not exists tmp_student as select * from result01”)
}
}
打包
-
配置环境。把hive的配置文件赋值到spark配置文件中。
cp hive-site.xml
/usr/spark/spark-2.1.1-bin-hadoop2.7/conf/
hive.metastore.schema.verification false提交到集群:
Spark-submit --master [local/spark/yarn] --class /jar包