Tez compile, deploy and test

1 篇文章 0 订阅

Compile

diff --git a/pom.xml b/pom.xml
index c8d372a22..e91378789 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,6 +101,7 @@

   <dependencyManagement>
     <dependencies>
+
       <dependency>
         <groupId>org.apache.tez</groupId>
         <artifactId>hadoop-shim</artifactId>
@@ -1012,6 +1013,51 @@

   <profiles>
     <profile>
+      <id>cdh5.5.1</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <properties>
+        <hadoop.version>2.6.0-cdh5.5.1</hadoop.version>
+        <pig.version>0.12.0-cdh5.5.1</pig.version>
+        <jackson.version>1.9.13</jackson.version>
+      </properties>
+      <pluginRepositories>
+        <pluginRepository>
+          <id>cloudera</id>
+          <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+        </pluginRepository>
+      </pluginRepositories>
+      <repositories>
+        <repository>
+          <id>cloudera</id>
+          <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+        </repository>
+      </repositories>
+      <dependencies>
+        <dependency>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+          <version>${jackson.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+          <version>${jackson.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-jaxrs</artifactId>
+          <version>${jackson.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-xc</artifactId>
+          <version>${jackson.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
       <id>sign</id>
       <build>
         <plugins>
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/JobContextImpl.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/JobContextImpl.java
index 1a0277c5d..20de83f8b 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/JobContextImpl.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/JobContextImpl.java
@@ -467,5 +467,15 @@ public class JobContextImpl implements JobContext {
   public Progressable getProgressible() {
     return progress;
   }
+
+  /**
+   * Get the boolean value for the property that specifies which classpath
+   * takes precedence when tasks are launched. True - user's classes takes
+   * precedence. False - system's classes takes precedence.
+   * @return true if user's classes should take precedence
+   */
+  public boolean userClassesTakesPrecedence() {
+    return conf.userClassesTakesPrecedence();
+  }

 }
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
index e8abe67a4..7212b3c38 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
@@ -94,7 +94,7 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
    */
   public static final String TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION = 
                                     "tez.shuffle-vertex-manager.min-src-fraction";
-  public static final float TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT = 0.25f;
+  public static final float TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT = 1f;

   /**
    * In case of a ScatterGather connection, once this fraction of source tasks
@@ -105,7 +105,7 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
    */
   public static final String TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION = 
                                       "tez.shuffle-vertex-manager.max-src-fraction";
-  public static final float TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT = 0.75f;
+  public static final float TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT = 1f;

   /**
    * Enables automatic parallelism determination for the vertex. Based on input data
diff --git a/tez-ui2/pom.xml b/tez-ui2/pom.xml
index f1b24cb2d..23a82ae0f 100644
--- a/tez-ui2/pom.xml
+++ b/tez-ui2/pom.xml
@@ -29,7 +29,7 @@
   <properties>
     <webappDir>src/main/webapp</webappDir>
     <node.executable>${basedir}/src/main/webapp/node/node</node.executable>
-    <nodeVersion>v0.12.2</nodeVersion>
+    <nodeVersion>v4.8.4</nodeVersion>
     <npmVersion>2.15.3</npmVersion>
     <skipTests>false</skipTests>
   </properties>
@@ -118,6 +118,7 @@
             <configuration>
               <nodeVersion>${nodeVersion}</nodeVersion>
               <npmVersion>${npmVersion}</npmVersion>
+              <downloadRoot>https://npm.taobao.org/mirrors/node/</downloadRoot>
             </configuration>
           </execution>
           <execution>
diff --git a/tez-ui2/src/main/webapp/package.json b/tez-ui2/src/main/webapp/package.json
index 1903c42a0..dc173cb3e 100644
--- a/tez-ui2/src/main/webapp/package.json
+++ b/tez-ui2/src/main/webapp/package.json
@@ -13,7 +13,7 @@
     "start": "TMPDIR=tmp node ./node_modules/ember-cli/bin/ember server",
     "test": "TMPDIR=tmp node ./node_modules/ember-cli/bin/ember test",

-    "build:mvn": "TMPDIR=tmp node/node ./node_modules/ember-cli/bin/ember build -prod",
+    "build:mvn": "TMPDIR=tmp node/node --harmony ./node_modules/ember-cli/bin/ember build -prod",
     "test:mvn": "TMPDIR=tmp node/node ./node_modules/ember-cli/bin/ember test"
   },
   "repository": {
  • compile
    mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true -Phadoop26 -Pcdh5.5.1
  • result

    • tez-x.y.z-minimal.tar.gz : contains tez jars
    • tez-x.y.z.tar.gz : contains tez jars and hadoop jars
    • tez-dist-x.y.z-tests.jar
  • Tips

    • if you meet compile error when you compile tez-ui,you may upgrade your node,npm ,bower version. Update pom.xml,bower.json ,package.json . Older version software may have some problem.
pom.xml:
    <nodeVersion>v0.10.18</nodeVersion>
    <npmVersion>1.3.8</npmVersion>

package.json:
        "bower": "1.4.1",

Install

Official site
https://tez.apache.org/install.html
hdp documents
http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.1.7/bk_installing_manually_book/content/rpm-chap-tez_configure_tez.html

mkdir tez-0.8.5 && cd tez-0.8.5 && tar -zxvf ../tez-0.8.5.tar.gz
cd .. && mv tez-0.8.5 /usr/lib && ln -s /usr/lib/tez-0.8.5 /usr/lib/tez
su - hdfs -c 'hadoop dfs -put -f /opt/app/tez-0.8.5-minimal.tar.gz /metadata/libs/tez/tez-0.8.5-minimal.tar.gz'
hadoop dfs -ls /metadata/libs/tez
  • Unzip tez-0.7.0.tar.gz.
  • upload tez libs to hdfs
hadoop dfs -rm -r -f /tmp/wankun/jars/tez/
hadoop dfs -mkdir /tmp/wankun/jars/tez/
hadoop dfs -put lib/                                         /tmp/wankun/jars/tez/
hadoop dfs -put tez-api-0.5.4.jar                            /tmp/wankun/jars/tez/
hadoop dfs -put tez-common-0.5.4.jar                         /tmp/wankun/jars/tez/
hadoop dfs -put tez-dag-0.5.4.jar                            /tmp/wankun/jars/tez/
hadoop dfs -put tez-examples-0.5.4.jar                       /tmp/wankun/jars/tez/
hadoop dfs -put tez-mapreduce-0.5.4.jar                      /tmp/wankun/jars/tez/
hadoop dfs -put tez-mbeans-resource-calculator-0.5.4.jar     /tmp/wankun/jars/tez/
hadoop dfs -put tez-runtime-internals-0.5.4.jar              /tmp/wankun/jars/tez/
hadoop dfs -put tez-runtime-library-0.5.4.jar                /tmp/wankun/jars/tez/
hadoop dfs -put tez-tests-0.5.4.jar                          /tmp/wankun/jars/tez/
hadoop dfs -put tez-yarn-timeline-history-0.5.4.jar          /tmp/wankun/jars/tez/

hadoop dfs -chmod -R 777 /tmp/wankun/jars/tez/
  • set tez environment variable
export TEZ_HOME=/home/wankun/tez
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:${TEZ_HOME}/conf:${TEZ_HOME}/*:${TEZ_HOME}/lib/*
  • set tez-site.xml

In the configuration file,you should upload tez jars and libs to hdfs file system. and point tez.lib.uris to the hdfs directory.

<!--Fri Apr 25 16:29:38 2014-->
    <configuration>

    <property>
      <name>tez.am.java.opts</name>
      <value>-server -Xmx1535m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
    </property>

    <property>
      <name>tez.am.env</name>
      <value>LD_LIBRARY_PATH=/var/bh/hadoop/lib/native:/usr/lib/hadoop/lib/native/`$JAVA_HOME/bin/java -d32 -version &amp;&gt; /dev/null;if [ $? -eq 0 ]; then echo Linux-i386-32; else echo Linux-amd64-64;fi`</value>
    </property>

    <property>
      <name>tez.am.shuffle-vertex-manager.max-src-fraction</name>
      <value>0.4</value>
    </property>

    <property>
      <name>tez.task.get-task.sleep.interval-ms.max</name>
      <value>200</value>
    </property>

    <property>
      <name>tez.staging-dir</name>
      <value>/tmp/${user.name}/staging</value>
    </property>

    <property>
      <name>tez.am.grouping.min-size</name>
      <value>16777216</value>
    </property>

    <property>
      <name>tez.runtime.intermediate-input.compress.codec</name>
      <value>org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>

    <property>
      <name>tez.am.container.reuse.enabled</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.yarn.ats.enabled</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.am.log.level</name>
      <value>INFO</value>
    </property>

    <property>
      <name>tez.session.am.dag.submit.timeout.secs</name>
      <value>300</value>
    </property>

    <property>
      <name>tez.am.grouping.split-waves</name>
      <value>1.4</value>
    </property>

    <property>
      <name>tez.session.client.timeout.secs</name>
      <value>180</value>
    </property>

    <property>
      <name>tez.runtime.intermediate-output.compress.codec</name>
      <value>org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>

    <property>
      <name>tez.am.shuffle-vertex-manager.min-src-fraction</name>
      <value>0.2</value>
    </property>

    <property>
      <name>tez.runtime.intermediate-output.should-compress</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.am.am-rm.heartbeat.interval-ms.max</name>
      <value>250</value>
    </property>

    <property>
      <name>tez.lib.uris</name>
      <value>hdfs:///bh/warehouse/dmp/jars/tez/,hdfs:///bh/warehouse/dmp/jars/tez/lib/</value>
    </property>

    <property>
      <name>tez.am.container.reuse.non-local-fallback.enabled</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.am.container.reuse.rack-fallback.enabled</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.am.grouping.max-size</name>
      <value>1073741824</value>
    </property>

    <property>
      <name>tez.am.container.reuse.locality.delay-allocation-millis</name>
      <value>250</value>
    </property>

    <property>
      <name>tez.runtime.intermediate-input.is-compressed</name>
      <value>true</value>
    </property>

    <property>
      <name>tez.am.resource.memory.mb</name>
      <value>2048</value>
    </property>

    <property>
      <name>tez.am.container.session.delay-allocation-millis</name>
      <value>30000</value>
    </property>

  </configuration>

Test

MapReduce

  • Put a test file to hdfs system.
hadoop dfs -rm -r -f /bh/warehouse/dmp/tmp/output/

hadoop jar tez-mapreduce-examples-0.4.1-incubating.jar orderedwordcount /bh/warehouse/dmp/tmp/input/47675.log /bh/warehouse/dmp/tmp/output/
  • Cann’t set queuename by -Dmapreduce.job.queuename=dmp_job in the command line. You must set the queuename by tez.queue.name in the tez-site.xml

Hive

Because I don’t have the production environment permissions, deploy on hive test failed .

Just record the deployment process.

  • Way 1
    Update mapreduce.framework.name=yarn-tez in mapred.xml. Not recommended

  • Way 2
    copy tez-site.xml and all tez libs to hive install directory. Then use set hive.execution.engine=tez; to enable tez .

  • Tips
    Our hadoop version is 2.5.0-cdh5.2.0 ,hive version is 0.13.0.
    tez-0.7.0 is too new and conflict with hadoop.
    tez-0.4.1-incubating-full is too old and even have no application logs.
    tez-0.5.4 have little application logs but confict with hive 0.13.0 .
    I’m crazy. It seems that cloudera company don’t want to support tez.(By cloudera blogs in 2014.8)

Tips

Check out hadoop source branch

git branch -va // view remote branches
git checkout remotes/origin/branch-2.5.2
./dev-support/create-release.sh

参考资料

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值