tez 安装
1.##下载maven 工具
wget http://mirrors.shu.edu.cn/apache/maven/binaries/apache-maven-3.1.1-bin.tar.gz
tar -zxvf apache-maven-3.1.1-bin.tar.gz
#设置环境变量/etc/profile
export MAVEN_HOME=/usr/local/maven
export PATH=${PATH}:${MAVEN_HOME}/bin
#查看版本号
mvn --version
Apache Maven 3.1.1 (0728685237757ffbf44136acec0402957f723d9a; 2013-09-17 23:22:22+0800)
Maven home: /usr/local/maven
#安装工具
yum install protobuf protobuf-devel -y
#查看hadoop 版本
hadoop version
wget http://mirrors.shu.edu.cn/apache/tez/0.9.1/apache-tez-0.9.1-src.tar.gz
tar -zxvf apache-tez-0.9.1-src.tar.gz
cd apache-tez-0.9.1-src
##修改pom.xml
注释改模块,因为这个模块的依赖是国外的,需要翻墙才能得到,用处也不大
<!-- <module>tez-ui</module> -->
##开始编译
mvn clean package -Dhadoop.version=2.7.7 -DskipTests=true -Dmaven.javadoc.skip=true
##编译结果,编译失败注意查看是否缺包
[INFO] Building jar: /root/apache-tez-0.9.1-src/docs/target/tez-docs-0.9.1-tests.jar
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary:
[INFO]
[INFO] tez ............................................... SUCCESS [0.981s]
[INFO] hadoop-shim ....................................... SUCCESS [1.436s]
[INFO] tez-api ........................................... SUCCESS [5.370s]
[INFO] tez-common ........................................ SUCCESS [0.650s]
[INFO] tez-runtime-internals ............................. SUCCESS [0.929s]
[INFO] tez-runtime-library ............................... SUCCESS [3.732s]
[INFO] tez-mapreduce ..................................... SUCCESS [1.534s]
[INFO] tez-examples ...................................... SUCCESS [0.249s]
[INFO] tez-dag ........................................... SUCCESS [5.089s]
[INFO] tez-tests ......................................... SUCCESS [0.966s]
[INFO] tez-ext-service-tests ............................. SUCCESS [0.788s]
[INFO] tez-plugins ....................................... SUCCESS [0.028s]
[INFO] tez-yarn-timeline-history ......................... SUCCESS [0.465s]
[INFO] tez-yarn-timeline-history-with-acls ............... SUCCESS [0.293s]
[INFO] tez-history-parser ................................ SUCCESS [59.180s]
[INFO] tez-aux-services .................................. SUCCESS [26.032s]
[INFO] tez-tools ......................................... SUCCESS [0.026s]
[INFO] tez-perf-analyzer ................................. SUCCESS [0.026s]
[INFO] tez-job-analyzer .................................. SUCCESS [5.997s]
[INFO] tez-javadoc-tools ................................. SUCCESS [0.163s]
[INFO] hadoop-shim-impls ................................. SUCCESS [0.024s]
[INFO] hadoop-shim-2.7 ................................... SUCCESS [0.125s]
[INFO] tez-dist .......................................... SUCCESS [11.260s]
[INFO] Tez ............................................... SUCCESS [0.030s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 2:05.755s
[INFO] Finished at: Fri Mar 01 10:04:57 CST 2019
[INFO] Final Memory: 108M/1456M
[INFO] ------------------------------------------------------------------------
[root@master apache-tez-0.9.1-src]#
#设置tez-site.xml
cat /usr/local/tez/conf/tez-site.xml
<configuration>
<property>
<name>tez.lib.uris</name>
<value>${fs.defaultFS}/usr/tez/tez.tar.gz</value>
</property>
</configuration>
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh 增加 tez 的环境变量
##set TEZ PATH
export TEZ_CONF_DIR=/usr/local/tez/conf/
export TEZ_JARS=/usr/local/tez
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*
###导入hdfs
hadoop fs -mkdir -p /usr/tez
hadoop fs -copyFromLocal /root/apache-tez-0.9.0-bin/share/tez.tar.gz /usr/tez
##设置/usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn-tez</value> ##有yarn 修改成yarn-tez
<!-- <value>yarn</value>-->
</property>
<property>
<name>mapred.job.tracker</name>
<value>http://master.sxw.com:9001</value>
</property>
</configuration>
所有的hadoop节点都需要同步以上配置
##重启集群后
对tez 进行测试
1. 写2个文件上传至hdfs
echo "Hello World Hello Tez" > file01
echo "Hello World Hello sxw com" > file02
hadoop fs -mkdir -p /usr/tez/{input,output}
hadoop fs -put file01 file02 /usr/tez/input
2.测试
[root@master tez]# hadoop jar tez-examples-0.9.1.jar orderedwordcount /usr/tez/input /usr/tez/output
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/tez/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
19/03/01 18:16:31 INFO shim.HadoopShimsLoader: Trying to locate HadoopShimProvider for hadoopVersion=2.7.7, majorVersion=2, minorVersion=7
19/03/01 18:16:31 INFO shim.HadoopShimsLoader: Picked HadoopShim org.apache.tez.hadoop.shim.HadoopShim27, providerName=org.apache.tez.hadoop.shim.HadoopShim25_26_27Provider, overrideProviderViaConfig=null, hadoopVersion=2.7.7, majorVersion=2, minorVersion=7
19/03/01 18:16:31 INFO client.TezClient: Tez Client Version: [ component=tez-api, version=0.9.1, revision=${buildNumber}, SCM-URL=scm:git:https://git-wip-us.apache.org/repos/asf/tez.git, buildTime=20190301-1500 ]
19/03/01 18:16:32 INFO impl.TimelineClientImpl: Timeline service address: http://master.sxw.com:8188/ws/v1/timeline/
19/03/01 18:16:32 INFO client.RMProxy: Connecting to ResourceManager at master.sxw.com/192.168.97.51:8032
19/03/01 18:16:32 INFO client.AHSProxy: Connecting to Application History server at master.sxw.com/192.168.97.51:10201
19/03/01 18:16:33 INFO examples.OrderedWordCount: Running OrderedWordCount
19/03/01 18:16:33 INFO client.TezClient: Submitting DAG application with id: application_1551431016168_0004
19/03/01 18:16:33 INFO client.TezClientUtils: Using tez.lib.uris value from configuration: hdfs://master.sxw.com:9000/usr/tez/tez-0.9.1.tar.gz
19/03/01 18:16:33 INFO client.TezClientUtils: Using tez.lib.uris.classpath value from configuration: null
19/03/01 18:16:33 INFO client.TezClient: Tez system stage directory hdfs://master.sxw.com:9000/tmp/root/tez/staging/.tez/application_1551431016168_0004 doesn't exist and is created
19/03/01 18:16:34 INFO client.TezClient: Submitting DAG to YARN, applicationId=application_1551431016168_0004, dagName=OrderedWordCount, callerContext={ context=TezExamples, callerType=null, callerId=null }
19/03/01 18:16:34 INFO impl.YarnClientImpl: Submitted application application_1551431016168_0004
19/03/01 18:16:34 INFO client.TezClient: The url to track the Tez AM: http://master.sxw.com:8088/proxy/application_1551431016168_0004/
19/03/01 18:16:38 INFO client.DAGClientImpl: DAG initialized: CurrentState=Running
19/03/01 18:16:38 INFO client.DAGClientImpl: DAG: State: RUNNING Progress: 0% TotalTasks: 3 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:38 INFO client.DAGClientImpl: VertexStatus: VertexName: Tokenizer Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:38 INFO client.DAGClientImpl: VertexStatus: VertexName: Summation Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:38 INFO client.DAGClientImpl: VertexStatus: VertexName: Sorter Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: DAG: State: RUNNING Progress: 33.33% TotalTasks: 3 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Tokenizer Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Summation Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Sorter Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: DAG: State: RUNNING Progress: 66.67% TotalTasks: 3 Succeeded: 2 Running: 1 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Tokenizer Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Summation Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:42 INFO client.DAGClientImpl: VertexStatus: VertexName: Sorter Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 1 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: DAG: State: RUNNING Progress: 100% TotalTasks: 3 Succeeded: 3 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Tokenizer Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Summation Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Sorter Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: DAG: State: SUCCEEDED Progress: 100% TotalTasks: 3 Succeeded: 3 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Tokenizer Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Summation Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: VertexStatus: VertexName: Sorter Progress: 100% TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0
19/03/01 18:16:43 INFO client.DAGClientImpl: DAG completed. FinalState=SUCCEEDED
[root@master tez]#
##把tez的lib/* 包 copy 到hive/lib/下面,否者报错
cp -rf /usr/local/tez/lib/* /usr/local/hive/lib
hive 中设置引擎
set hive.execution.engine
set; ##查看 hive.execution.engine=mr;
修改成tez
set hive.execution.engine=tez;
vim /usr/local/hive/conf/hive-site.xml
把
hive.execution.engine=mr; 修改成tez
@导入数据
hive -f ods_zzzzzz.sql
hadoop dfs -put ods_fz_ssp_school_org_school/* /user/hive/warehouse/zzzz/zzzzz.sql
#hive 测试
##tomcat 安装
wget http://mirrors.tuna.tsinghua.edu.cn/apache/tomcat/tomcat-8/v8.5.38/bin/apache-tomcat-8.5.38.tar.gz
tar -zxvf apache-tomcat-8.5.38.tar.gz /usr/local
mv /usr/local/apache-tomcat-8.5.38 /usr/local/tomcat
mkdir /usr/local/tomcat/webapps/tez-ui/
cd /usr/local/tomcat/webapps/tez-ui/
unzip /usr/local/tez/tez-ui-0.9.1.war
##修改配置
vi /usr/local/tomcat/webapps/tez-ui/config/configs.env
timeline: "http://master.sxw.com:8188",
rm: "http://master.sxw.com:8088",
修改 yarn-site.xml 所有节点都要修改哦
[root@slave1 ~]# vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master.sxw.com</value>
</property>
<!-- conf timeline server -->
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.hostname</name>
<value>master.sxw.com</value>
</property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name> yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>
<property>
<description>Address for the Timeline server to start the RPC server.</description>
<name>yarn.timeline-service.address</name>
<value>master.sxw.com:10201</value>
</property>
<property>
<description>The http address of the Timeline service web application.</description>
<name>yarn.timeline-service.webapp.address</name>
<value>master.sxw.com:8188</value>
</property>
<property>
<description>The https address of the Timeline service web application.</description>
<name>yarn.timeline-service.webapp.https.address</name>
<value>master.sxw.com:2191</value>
</property>
<property>
<name>yarn.timeline-service.handler-thread-count</name>
<value>24</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>5120</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>6000</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>mapredule.reduce.memory.mb</name>
<value>2048</value>
</property>
</configuration>
修改 vim /usr/local/tez/conf/tez-site.xml 所有节点都需要修改
<property>
<name>tez.tez-ui.history-url.base</name>
<value>http://master.sxw.com:8080/tez-ui/</value>
</property>
##reboot hadoop 集群
/usr/local/hadoop/sbin/stop-all.sh
/usr/local/hadoop/sbin/start-all.sh
@ 启动 /usr/local/hadoop/sbin/yarn-daemon.sh start timelineserver