1.hosts
linux:内网ip
widows hosts添加:外网ip
47.x.x.x 主机名
2. etc/hadoop/slaves
linux主机名
3. 4个xml配置 把主机名“zs”改掉即可
yarn-site.xml
里面需配置0.0.0.0->外网访问8088端口yarn集群控制页面
复制到idea的resources时把0.0.0.0改为主机名“zs”(即外网ip)
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>zs</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>0.0.0.0:18040</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>0.0.0.0:18030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>0.0.0.0:18025</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>0.0.0.0:18141</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///home/zzh/data/nm</value>
</property>
<!-- 新增加的配置 -->
<property>
<name>yarn.log.server.url</name>
<value>http://0.0.0.0:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>0.0.0.0:20000</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>-1</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/logs</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
mapred-site.xml
加到windows端idea时别忘了在mapred-site.xml加上配置
<!--idea本地配置-->
<property>
<name>mapreduce.job.jar</name>
<value>D:\\java源码系列\\hadoop222\\target\\hadoop222-1.0-SNAPSHOT.jar</value>
</property>
<property>
<name>mapreduce.app-submission.cross-platform</name>
<value>true</value>
</property>
下面为mapred-site.xml linux端
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 新增加的配置 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>zs:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>zs:19888</value>
</property>
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
</configuration>
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://zs:9000</value>
</property>
<!-- 新增加的配置 -->
<property>
<name>hadoop.proxyuser.zzh.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.zzh.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>zzh</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservices</name>
<value>zs-cluster</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>64M</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///home/zzh/data/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/zzh/data/dn</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///home/zzh/data/snn</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>file:///home/zzh/data/snn</value>
</property>
<!-- 新增的配置 -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.web.ugi</name>
<value>zzh,zzh</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>000</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>zzh</value>
</property>
<property>
<name>dfs.namenode.safemode.threshold-pct</name>
<value>0f</value>
</property>
<property>
<name>dfs.namenode.name.dir.restore</name>
<value>true</value>
</property>
<property>
<name>dfs.cluster.administrators</name>
<value>*</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>zs:9001</value>
</property>
</configuration>
idea MapReduce代码分享
相比本地虚拟机加 conf.set(“dfs.client.use.datanode.hostname”, “true”);这一句
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.StringTokenizer;
public class WordCount extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new WordCount(),args));
}
@Override
public int run(String[] strings) throws Exception {
Configuration conf=this.getConf();
conf.set("dfs.client.use.datanode.hostname", "true");//重点配置
Path in = new Path("/data/test/wordcount/");
Path out=new Path("./word1");
Job job = Job.getInstance(conf, "单词计数");
job.setJarByClass(this.getClass());
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,in);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,out);
return job.waitForCompletion(true)?0:1;
}
static class WordCountMapper extends Mapper <LongWritable,Text,Text,IntWritable> {
private final static IntWritable one=new IntWritable(1); //统计使用变量
private Text word=new Text(); //单词变量
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer words=new StringTokenizer(value.toString());
while(words.hasMoreTokens()){
word.set(words.nextToken());
context.write(word, one);
}
}
}
static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private Text k3=new Text();
private IntWritable v3=new IntWritable();
@Override
protected void reduce(Text k2, Iterable<IntWritable> v2s, Context context) throws IOException, InterruptedException {
this.k3.set(k2.toString());
int sum=0;
for(IntWritable v2: v2s){
sum+=v2.get();
}
this.v3.set(sum);
context.write(this.k3,this.v3);
}
}
}