版本
spark-2.1.3-bin-hadoop2.7
hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://bigdata02:3306/hive?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.insert.into.multilevel.dirs</name>
<value>true</value>
<description>允许生成多级目录</description>
</property>
<property>
<name>hive.exec.stagingdir</name>
<value>/home/tools/spark/spark-2.1.3-bin-hadoop2.7/hive_tmp</value>
<description>临时文件暂放目录</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://bigdata02:9000/user/hive/warehouse</value>
</property>
<--
<property>
<name>fs.defaultFS</name>
<value>hdfs://bigdata02:9000</value>
</property>
-->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>bigdata01</value>
<description>ip配置</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10001</value>
<description>端口配置</description>
</property>
<property><!--设置匿名用户为超级管理员有最高权限-->
<name>hive.users.in.admin.role</name>
<value>anonymous</value>
</property>
</configuration>
spark-defaults.conf
spark.yarn.jars hdfs://mini:9000/home/hadoop/spark_jars/*
spark.sql.adaptive.enabled true
spark-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_251
export HADOOP_HOME=/home/tools/hadoop/hadoop-2.7.3
export HADOOP_CONF_DIR=/home/tools/hadoop/hadoop-2.7.3/etc/hadoop
export YARN_CONF_DIR=/home/tools/hadoop/hadoop-2.7.3/etc/hadoop
export SPARK_HOME=/home/tools/spark/spark-2.1.3-bin-hadoop2.7
//将hadoop的配置文件放在spark_home/conf 目录下
将spark目录下的lib包上传到配置文件指定的目录 hdfs://bigdata01:9000/home/hadoop/spark_jars/*
启动
./spark-sql \
--master yarn-client \ #使用yarn集群启动
--num-executors 3 \ #配置执行者数量
--executor-memory 3G \ #配置每一个执行者的内存
--executor-cores 3 #配置每一个执行者的cpu核心数
使用thriftserver服务启动
spark_home/sbin/start-thriftserver.sh \
--master yarn \
--num-executors 3 \
--executor-memory 3G \
--executor-cores 3
连接sparksql
spark_home/bin/beeline -u jdbc:hive2://bigdata01:10001/
创建表并导入数据
create table data_table(id int,group1 string,group2 string,value1 int,value2 int,value3 int,date string) row format delimited fields terminated by ',' stored as textfile
load data inpath 'hdfs://bigdata01:9000/tempdata/sparksql' into table data_table
进行数据查询