源码编译Tez整合HIve,提升Hive执行效率

源码编译Tez基于Hadoop3.3.1

热身运动

  • 搭建java环境
# 安装java环境
mkdir /usr/java
# tar xf jdk-8u261-linux-x64.tar.gz -C /opt/
ln -s /opt/jdk1.8.0_261 /usr/java/default

cat > /etc/profile.d/java.sh <<-EOF
#!/bin/bash
export JAVA_HOME=/usr/java/default
export PATH=$PATH:$JAVA_HOME/bin
EOF
source /etc/profile

java -verison
  • 安装maven
#!/bin/bash
install_maven(){
  source_url='https://mirrors.tuna.tsinghua.edu.cn/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz'
  cd ~ && wget ${source_url} -O apache-maven-3.6.3-bin.tar.gz 
  tar xvf apache-maven-3.6.3-bin.tar.gz -C /usr/local/
if [ -d /usr/local/apache-maven-3.6.3 ];then
cat > /etc/profile.d/maven.sh <<-EOF
#!/bin/bash
export M2_HOME=/usr/local/apache-maven-3.6.3
export PATH=\$PATH:\$M2_HOME/bin
EOF
  chmod 0744 /etc/profile.d/maven.sh
  source /etc/profile.d/maven.sh
  rm -f ~/apache-maven-3.6.3-bin.tar.gz
fi
}

install_maven

cat > /usr/local/apache-maven-3.6.3/conf/settings.xml <<-EOF
<?xml version="1.0" encoding="UTF-8"?>
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
    <localRepository>/root/.m2/repository</localRepository>
    <pluginGroups>
    </pluginGroups>
    <proxies>
    </proxies>
    <servers>
    </servers>
    <mirrors>
        <mirror>
            <id>aliyunmaven</id>
            <mirrorOf>*</mirrorOf>
            <name>阿里云公共仓库</name>
            <url>https://maven.aliyun.com/repository/public</url>
        </mirror>
        <mirror>
            <id>aliyun-central</id>
            <mirrorOf>central</mirrorOf>
            <name>阿里云central仓库</name>
            <url>https://maven.aliyun.com/repository/central</url>
        </mirror>
        <mirror>
            <id>aliyun-google</id>
            <mirrorOf>repo</mirrorOf>
            <name>阿里云google仓库</name>
            <url>
                https://maven.aliyun.com/repository/google
            </url>
        </mirror>
        <mirror>
            <id>aliyun-gradle</id>
            <mirrorOf>repo1</mirrorOf>
            <name>阿里云gradle仓库</name>
            <url>
                https://maven.aliyun.com/repository/gradle-plugin
            </url>
        </mirror>
        <mirror>
            <id>aliyun-spring</id>
            <mirrorOf>repo2</mirrorOf>
            <name>阿里云spring仓库</name>
            <url>
                https://maven.aliyun.com/repository/spring
            </url>
        </mirror>
        <mirror>
            <id>aliyun-spring-plugin</id>
            <mirrorOf>repo3</mirrorOf>
            <name>阿里云spring-plugin仓库</name>
            <url>
                https://maven.aliyun.com/repository/spring-plugin
            </url>
        </mirror>
        <mirror>
            <id>aliyun-grails-core</id>
            <mirrorOf>repo4</mirrorOf>
            <name>阿里云grails-core仓库</name>
            <url>
                https://maven.aliyun.com/repository/grails-core
            </url>
        </mirror>
        <mirror>
            <id>aliyun-apache-snapshots</id>
            <mirrorOf>repo5</mirrorOf>
            <name>阿里云apache-snapshots仓库</name>
            <url>
                https://maven.aliyun.com/repository/apache-snapshots
            </url>
        </mirror>
    </mirrors>
    <profiles>
    </profiles>
</settings>
EOF
  • 安装protoc 2.5.0
安装编译protobuf
# 编译protoc 版本要求是2.5.0
wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz
$ tar xf protobuf-2.5.0.tar.gz
$ ./configure
$ make -j 8
$ make install
$ protoc --version
libprotoc 2.5.0
  • 编译
# 方式一: 下载官方的二进制包
wget https://dlcdn.apache.org/tez/0.10.1/apache-tez-0.10.1-bin.tar.gz


# 方式二: 源码编译
下载包
https://github.com/apache/tez/archive/refs/tags/rel/release-0.10.1.tar.gz

在源码目录中的pom.xml中将tez-ui移除,太多坑。。。
<slf4j.version>1.7.25</slf4j.version> 跟hadoop-3.3.1 一致
mvn install -Dhadoop.version=3.3.1 -DskipTests -Dmaven.javadoc.skip=true

编译安装完成后,会在源码目录下的 tez-dist/target/ 中找到编译好的 Tez
有两个版本
	tez-0.10.1-minimal.tar.gz
	tez-0.10.1.tar.gz

上传 Tez 的 jar 包到 HDFS

# 下载的官方二进制包,但是支持的是hadoop3.1.3.  我觉得不够香
wget https://dlcdn.apache.org/tez/0.10.1/apache-tez-0.10.1-bin.tar.gz
tar xf apache-tez-0.10.1-bin.tar.gz

mkdir ~/tez-0.10.1
tar xf share/tez.tar.gz -C ~/tez-0.10.1/
hadoop  fs  -mkdir  /libs/tez
hadoop  fs -put tez-0.10.1 /libs/tez




# 自己动手编译的包
mkdir tez-0.10.1-hadoop-3.3.1
tar xf tez-0.10.1.tar.gz -C tez-0.10.1-hadoop-3.3.1/
hadoop  fs  -mkdir  /libs/tez
hadoop  fs -put tez-0.10.1-hadoop-3.3.1 /libs/tez

配置 Hive

注: 可将配置文件按服务分开 依次为 hivemetastore-site.xml(metastore的配置文件) hiveserver2-site.xml(hiveserver2的配置文件) 和 hive-site.xml(全局配置文件)

这样就可以在每一个集群主机都包含这些文件

# 在hive的配置目录增加tez-site.xml文件,也要分发到客户端
cat > tez-site.xml <<-EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>tez.lib.uris</name>
        <value>${fs.defaultFS}/libs/tez/tez-0.10.1-hadoop-3.3.1,${fs.defaultFS}/libs/tez/tez-0.10.1-hadoop-3.3.1/lib</value>
        <description>指定 Hive 运行依赖的 tez 包</description>
    </property>
    <property>
        <name>tez.lib.uris.classpath</name>
        <value>${fs.defaultFS}/libs/tez/tez-0.10.1-hadoop-3.3.1,${fs.defaultFS}/libs/tez/tez-0.10.1-hadoop-3.3.1/lib</value>
        <description>指定 Hadoop 依赖的相关包</description>
    </property>
    <property>
        <name>tez.use.cluster.hadoop-libs</name>
        <value>true</value>
        <description>是否使用 Hadoop 自身的 lib 包</description>
    </property>
    <property>
        <name>tez.am.resource.memory.mb</name>
        <value>4096</value>
        <description>环形缓冲区大小</description>
    </property>
    <property>
        <name>tez.am.resource.cpu.vcores</name>
        <value>4</value>
        <description>cpu核数限制</description>
    </property>
</configuration>
EOF

# 如果想之后默认使用tez引擎,到客户端
修改hive-site.xml
<property>
    <name>hive.execution.engine</name>
    <value>tez</value>
</property>
<property>
  <name>hive.tez.container.size</name>
  <value>4096</value>
</property>

部署 Tez 本地程序

[god@node01 opt]$ mkdir /opt/bigdata/tez/tez-0.10.1 -p
[god@node01 opt]$ tar xf tez-0.10.1-minimal.tar.gz -C /opt/bigdata/tez/tez-0.10.1/

[god@node01 opt]$ cd /opt/bigdata/tez
[god@node01 tez]$ ln -s tez-0.10.1 current

Tez 包的引入

cat /opt/bigdata/hive/current/conf/hive-env.sh
export HADOOP_HOME=/opt/bigdata/hadoop/current
export HIVE_CONF_DIR=/opt/bigdata/hive/current/conf
export HIVE_HOME=/opt/bigdata/hive/current
export TEZ_HOME=/opt/bigdata/tez/current

复制tez包到其他机器

# 先将tez整个目录tar打包,用sc传送,然后再解压。这样可以保持软连接
[god@node01 ~]$ cd /opt/bigdata
# 打包
[god@node01 bigdata]$ tar czf tez.gz tez
# 传送
[god@node01 bigdata]$ for i in {2..5}; do scp -p tez.gz node0${i}:`pwd` ;done
# 解包,删除压缩包
[god@node01 bigdata]$ for i in {2..5};do ssh node0${i} "cd /opt/bigdata && tar xf tez.gz && rm -f tez.gz ";done




# 复制hive-env.sh tez-site.xml
[god@node01 tez]$ cd /opt/bigdata/hive/current/conf/  # = cd $HIVE_HOME/conf
[god@node01 conf]$ for i in {2..5};do scp -p hive-env.sh tez-site.xml node0${i}:`pwd` ;done

# 在client主机中加入hive.execution.engine=tez
cat > hive-site.xml <<-EOF
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>hive.execution.engine</name>
        <value>tez</value>
    </property>
    <property>
        <name>hive.tez.container.size</name>
        <value>4096</value>
    </property>
    <property>
        <name>hive.metastore.uris</name>
        <value>thrift://node01:9083</value>
        <description>metastore地址</description>
    </property>
</configuration>
EOF
[god@node02 conf]$ for i in {3..5};do scp -p hive-site.xml node0${i}:`pwd` ;done

测试功能

# 准备数据
create external table my_tez
(
id int,
name string,
likes array<string>,
address map<string,string>
)
partitioned by(age int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
location '/chauncy';


for i in {1..100}; do echo "${i},小明${i},抽烟-喝酒-烫头,上海:静安${1}" >> data.txt ;done
hdfs dfs -mkdir -p /chauncy/age=10
hadoop fs -copyFromLocal data.txt /chauncy/age=10


# 启动 Metastore 服务,在hive master主机上
[god@node01 conf]$ nohup hive --service metastore > /opt/bigdata/hive/current/metastore.log 2>&1  &

# 让 Hiveserver2 服务启动到后台,选其他几台找那个任意一个机器
[god@node02 ~]$ nohup hiveserver2 --hiveconf hive.server2.thrift.prot=10000 > /opt/bigdata/hive/current/hiveserver2.log 2>&1 &

# node04连接
beeline -u jdbc:hive2://node02:10000/default -n god
0: jdbc:hive2://node02:10000/default> set hive.execution.engine; # 变成了tez
0: jdbc:hive2://node02:10000/default> msck repair table my_tez; # 修复分区表,hdfs数据加载到表
0: jdbc:hive2://node02:10000/default> select count(*) from my_tez; # 查询数据,快了很多
----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED  
----------------------------------------------------------------------------------------------
Map 1 .......... container     SUCCEEDED      1          1        0        0       0       0  
Reducer 2 ...... container     SUCCEEDED      1          1        0        0       0       0  
----------------------------------------------------------------------------------------------
VERTICES: 02/02  [==========================>>] 100%  ELAPSED TIME: 4.46 s     
----------------------------------------------------------------------------------------------
INFO  : Completed executing command(queryId=god_20211024194720_d805887c-5379-4a89-bc9a-0e1b0354492a); Time taken: 9.535 seconds
INFO  : OK
INFO  : Concurrency mode is disabled, not creating a lock manager
+------+
| _c0  |
+------+
| 100  |
+------+
1 row selected (9.737 seconds)

# 切换引擎
0: jdbc:hive2://node02:10000/default> set hive.execution.engine=mr;
No rows affected (0.008 seconds)
0: jdbc:hive2://node02:10000/default> select count(*) from my_tez;

# 加餐: 生产环境提交hive任务的方式
beeline -u jdbc:hive2://node02:10000/data_dev -n god -e "show tables;"
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值