hive2.3.x的配置和使用

最新推荐文章于 2024-05-02 17:50:18 发布

零度爱情12138

最新推荐文章于 2024-05-02 17:50:18 发布

阅读量849

点赞数 1

分类专栏：大数据文章标签：大数据

本文链接：https://blog.csdn.net/u012421093/article/details/105848999

版权

大数据专栏收录该内容

4 篇文章 0 订阅

订阅专栏

准备工作

JDK
hadoop
hive

执行引擎

引擎	说明
mr	默认引擎，hadoop自带的框架，在2.x版本中已不推荐使用
tez	相比于mr，减少了磁盘io，速度比mr有明显提升
spark	内存计算框架，速度最快

运行模式

模式	说明	特点
内嵌模式	数据保存在内嵌的 derby 数据库中	不支持多用户登录
本地模式	数据保存在本地的数据库，如mysql	支持多用户登录
远程模式	数据保存在远程的数据库中，如mysql	多个hive客户端使用同一个套元数据，节省资源

开始使用

解压hive，进入conf目录

hive-env.sh.template 名称更改为 hive-env.sh

# 配置自己的hadoop路径
HADOOP_HOME=/opt/module/hadoop-2.10.0/
# 指定配置文件路径
export HIVE_CONF_DIR=/opt/module/apache-hive-2.3.7-bin/conf/
# 指定jar包所在目录，默认读取当前安装目录下的lib
export HIVE_AUX_JARS_PATH=/opt/module/apache-hive-2.3.7-bin/lib/

启动hdfs

sbin/start-dfs.sh

启动yarn

sbin/start-yarn.sh

在hdfs上创建hive所需的文件夹和权限

# 创建文件夹
hadoop fs -mkdir /tmp
hadoop fs -mkdir -p /user/hive/warehouse
# 给予组读权限
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse

启动hive

# 初始化，如果报错，删除安装目录下的metastore_db文件，重新执行,这里使用的是内嵌模式
bin/schematool -dbType derby -initSchema
# 启动
bin/hive

配置本地登录模式（远程登录模式也适用）

hive-default.xml.template 重命名 hive-site.xml

<!-- 修改以下属性 -->
<configuration>
	<!-- mysql地址 -->
	<property>
    	<name>javax.jdo.option.ConnectionURL</name>
    	<value>jdbc:mysql://192.168.75.136:3306/mydb?createDatabaseIfNotExist=true</value>
  	</property>
  	<!-- 驱动类 -->
  	 <property>
    	<name>javax.jdo.option.ConnectionDriverName</name>
    	<value>com.mysql.cj.jdbc.Driver</value>
  	</property>
  	<!-- mysql登陆用户名  -->
  	<property>
    	<name>javax.jdo.option.ConnectionUserName</name>
    	<value>root</value>
  	</property>
  	<!-- mysql登录密码 -->
  	 <property>
    	<name>javax.jdo.option.ConnectionPassword</name>
    	<value>mine</value>
  	</property>
  	<!-- 指定io的缓存目录 -->
  	<property>
    	<name>hive.exec.local.scratchdir</name>
    	<value>/opt/module/apache-hive-2.3.7-bin/iotmp/atguigu</value>
  </property>
  <!-- 指定资源文件目录 -->
  <property>
    	<name>hive.downloaded.resources.dir</name>
    	<value>/opt/module/apache-hive-2.3.7-bin/iotmp/${hive.session.id}_resources</value>
  	</property>
</configuration>

复制mysql驱动jar包到hive安装目录下的 lib 目录下
执行初始化命令

bin/schematool -dbType mysql -initSchema

执行成功后，mysql中成功生成表数据

复杂数据类型

名称	示例
array	array<string>
map	map<key, value>
struct	struct <name:string, age:int>

建表语句

# create table <table_name>(<name> <type>,<name> <type>);
# 基本建表语句
create table student(id int,name string);
# 复杂建表语句，如下数据的建表示例
# ----------------------
# --- mayun,  # 姓名 马云
# --- ma hua teng_luo yong hao,  # 朋友  马化腾 罗永浩 用下划线'_'分隔多条
# --- xiao ma yun:24_xiao xiao ma yun:12,  # 孩子 小马云 小小马云 键值对 用冒号':'表示 用下划线'_'分隔多条
# --- tian an men_beijing   # 住址 北京天安门 用struct复合体 用下划线'_'分隔
# -----------------------
create table student1(name string,
					 friends array<string>,
					 children map<string,int>,
					 address struct<street:string, city:string>
					 )
# 用逗号分隔数据
row format delimited fields terminated by ','
# 表示多个数据时，使用下划线分隔
collection items terminated by '_'
# map的分隔符号，冒号左边key，右边value
map keys terminated by ':'
# 每行以换行符结束
lines terminated by '\n';

测试

# 测试文件，存入txt文件
mayun,ma hua teng_luo yong hao,xiao ma yun:24_xiao xiao ma yun:12,tian an men_beijing
# 上传至hdfs目录
hdfs dfs -put test.txt /user/hive/warehouse/student1
# 查询导入的数据
select * from student1;

结果
复杂结构的取值

# 取出数组，键值对，结构体中的数据
select friends[1],children['xiao ma yun'],address.city from student;

常用的sql语句

< > 必须 [ ] 可选

查询详情

desc database <database_name>
desc [extended] <table_name>
desc [formatted] <table_name>
# extended 详细信息
# formatted 格式信息（表类型等）

建数据库

create database [if not exists] <database_name> [location <'path'>];

建表

create [external] table [if not exists] <table_name> # 表名
(<col_name> <data_type> [comment col_name_comment] [, ...]) # 字段类型
[comment table_comment] # 表commnet
[partitioned by (<col_name> <data_type> [comment col_name_comment]) [, ...]] # 分区
[location <path>] #
# external 外部表
# partitioned 分区

# sql的查询结果建表
create table <table_name> as <sql>;
# 复制其他表的结构(不包括数据)
create table <table_name> like <table_name>;

插入语句

insert <into|overwrite> <table_name> [partition (col_name="value")] values (<value> [, ...]); 

insert <into|overwrite> <table_name> [partition (col_name="value")] <sql>

导入数据

load data [local] inpath <'path'> [overwrite] into table <table_name> [partition (col_name="value")];

import table <table_name> from <'path'>;

导出数据

insert overwrite [local] directory <'path'> [row format delimited fields terminated by <'symbol'>] <sql>;

export table <table_name> to <'path'>;

修改表

# 修改表名
alter table <table_name> rename to <new_table_name>;

# 修改列
alter table <table_name> change [column] olb_col_name new_col_name data_type [comment col_name_comment];

# 添加列
alter table <table_name> add columns (<col_name> <data_type> [comment col_name_comment]);

# 替换列
alter table <table_name> replace columns (<col_name> <data_type> [comment col_name_comment]);

# 转换为内部表   **'EXTERNAL'='FALSE' 固定写法，区分大小写与双引号**
alter table <table_name> set tblproperties('EXTERNAL'='FALSE');

# 转换为外部表  **'EXTERNAL'='TRUE' 固定写法，区分大小写与双引号**
alter table <table_name> set tblproperties('EXTERNAL'='TRUE');

# 新建分区
alter table <table_name> add partition(name="value") [partition(name="value") ...];
# 删除分区
alter table <table_name> drop partition(name="value") [,partition(name="value")

清空表数据

# 只能删除内部表数据，不能删除外部表数据
truncate table <table_name>;

常用hive指令

hive -e <sql> [> <result_file>]
hive -f <file_name> [> <result_file>]

# 在hive shell中查看hdfs文件系统
dfs -ls /
# 在hive shell中查看本地文件系统
! ls /opt
# hive的指令记录
cat /home/{user}/.hivehistory

# 手动添加分区和数据，产生关联关系，方式一
msck repair table test990;
# 手动添加分区和数据，产生关联关系，方式二
alter table <table_name> add partition(name="value");
# 手动添加分区和数据，产生关联关系，方式三
load data [local] inpath <'path'> [overwrite] into table <table_name> [partition (col_name="value")];

其他实用配置

<configuration>
	<!-- 显示当前db名称 -->
  	<property>
    	<name>hive.cli.print.current.db</name>
    	<value>true</value>
  	</property>
  	<!-- 查询结果中显示字段信息 -->
  	<property>
    	<name>hive.cli.print.header</name>
    	<value>true</value>
  	</property>
  	<!-- 数据仓位置，默认在hdfs文件系统下 -->
  	<property>
    	<name>hive.metastore.warehouse.dir</name>
    	<value>/user/hive/warehouse</value>
  	</property>
</configuration>

配置日志文件

hive日志

# hive-log4j2.properties.template 修改为 hive-log4j2.properties
property.hive.log.dir = /opt/module/apache-hive-2.3.7-bin/logs

beeline日志，默认打印到控制台

# beeline-log4j2.properties.template  修改为 beeline-log4j2.properties

参数动态配置的方式

# 当次有效
bin/hive --hiveconf <property=value>
# hive shell  当次有效
set <property=value>;

零度爱情12138

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
hive2.3.x的配置和使用

准备工作JDKhadoophive开始使用解压hive，进入conf目录，配置hadoophive-env.sh.template 名称更改为 hive-env.sh配置自己的hadoop路径和配置文件路径HADOOP_HOME=/opt/module/hadoop-2.10.0/export HIVE_CONF_DIR=/opt/module/apache-hive-2.3...
复制链接

扫一扫