1.准备一份写好的mapreduce代码
2.本地添加yarn-site.xml
hadoop001改为你对应的集群hostname
<?xml version="1.0"?>
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop001</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
3.本地添加mapred-site.xml
hadoop001改为你对应的集群hostname
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop001:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop001:19888</value>
</property>
<property>
<name>mapred.remote.os</name>
<value>Linux</value>
</property>
<!--
Windows必须写这一条
解决/bin/bash: line 0: fg: no job control的错误
-->
<property>
<name>mapreduce.app-submission.cross-platform</name>
<value>true</value>
</property>
</configuration>
4.修改job.setJarByClass为job.setJar
设置此项可解决Class WordCount$XXXMapper not found错误。
// job.setJarByClass(WordCountApp.class);
job.setJar("target/hadoop-learn-1.0-SNAPSHOT.jar");
先通过mvn package将项目打包成jar,然后再将Jar对应的路径填写到这里。如果路径错误或者用成了setJarByClass会报ClassNotFound错误。