阿里云远程MapReduce配置（使用公网）

最新推荐文章于 2021-11-24 14:26:48 发布

shadow123666

最新推荐文章于 2021-11-24 14:26:48 发布

阅读量472

点赞数 1

分类专栏：学习文章标签： mapreduce hadoop

本文链接：https://blog.csdn.net/weixin_42458562/article/details/104861865

版权

学习专栏收录该内容

37 篇文章 2 订阅

订阅专栏

1.hosts
linux:内网ip
在这里插入图片描述
widows hosts添加:外网ip
47.x.x.x 主机名

2. etc/hadoop/slaves
linux主机名
在这里插入图片描述
3. 4个xml配置把主机名“zs”改掉即可

yarn-site.xml
里面需配置0.0.0.0->外网访问8088端口yarn集群控制页面
复制到idea的resources时把0.0.0.0改为主机名“zs”（即外网ip）

<configuration>

    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>zs</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>0.0.0.0:18040</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>0.0.0.0:18030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>0.0.0.0:18025</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>0.0.0.0:18141</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>0.0.0.0:8088</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>file:///home/zzh/data/nm</value>
    </property>
    <!-- 新增加的配置 -->
    <property>
        <name>yarn.log.server.url</name>
        <value>http://0.0.0.0:19888/jobhistory/logs/</value>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.web-proxy.address</name>
        <value>0.0.0.0:20000</value>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>-1</value>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/logs</value>
    </property>
    <property>
        <name>yarn.nodemanager.pmem-check-enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>
</configuration>

mapred-site.xml
加到windows端idea时别忘了在mapred-site.xml加上配置

<!--idea本地配置-->
    <property>
        <name>mapreduce.job.jar</name>
        <value>D:\\java源码系列\\hadoop222\\target\\hadoop222-1.0-SNAPSHOT.jar</value>
    </property>
    <property>
        <name>mapreduce.app-submission.cross-platform</name>
        <value>true</value>
    </property>

下面为mapred-site.xml linux端

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <!-- 新增加的配置 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>zs:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>zs:19888</value>
    </property>
    <property>
        <name>mapreduce.job.ubertask.enable</name>
        <value>true</value>
    </property>
</configuration>

core-site.xml

<configuration>
 <property>
     <name>fs.defaultFS</name>
     <value>hdfs://zs:9000</value>
 </property>
 <!-- 新增加的配置 -->
 <property>
     <name>hadoop.proxyuser.zzh.hosts</name>
     <value>*</value>
 </property>
 <property>
     <name>hadoop.proxyuser.zzh.groups</name>
     <value>*</value>
 </property>
 <property>
     <name>hadoop.http.staticuser.user</name>
     <value>zzh</value>
 </property>
</configuration>

hdfs-site.xml

<configuration>
    <property>
        <name>dfs.nameservices</name>
        <value>zs-cluster</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>64M</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///home/zzh/data/nn</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///home/zzh/data/dn</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.dir</name>
        <value>file:///home/zzh/data/snn</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.edits.dir</name>
        <value>file:///home/zzh/data/snn</value>
    </property>
    <!--  新增的配置   -->
     <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
        </property>
    <property>
        <name>dfs.web.ugi</name>
        <value>zzh,zzh</value>
    </property>
    <property>
        <name>fs.permissions.umask-mode</name>
        <value>000</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.permissions.superusergroup</name>
        <value>zzh</value>
    </property>
    <property>
        <name>dfs.namenode.safemode.threshold-pct</name>
        <value>0f</value>
    </property>
        <property>
        <name>dfs.namenode.name.dir.restore</name>
        <value>true</value>
        </property>
        <property>
        <name>dfs.cluster.administrators</name>
        <value>*</value>
        </property>
        <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>zs:9001</value>
        </property>
</configuration>

idea MapReduce代码分享

相比本地虚拟机加 conf.set(“dfs.client.use.datanode.hostname”, “true”);这一句

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.StringTokenizer;

public class WordCount extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new WordCount(),args));
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf=this.getConf();
        
        conf.set("dfs.client.use.datanode.hostname", "true");//重点配置

        Path in = new Path("/data/test/wordcount/");
        Path out=new Path("./word1");

        Job job = Job.getInstance(conf, "单词计数");
        job.setJarByClass(this.getClass());


        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,in);

        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,out);

        return job.waitForCompletion(true)?0:1;

    }
    static class WordCountMapper extends Mapper <LongWritable,Text,Text,IntWritable> {

        private final static IntWritable one=new IntWritable(1); //统计使用变量
        private Text word=new Text(); //单词变量

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            StringTokenizer words=new StringTokenizer(value.toString());
            while(words.hasMoreTokens()){
                word.set(words.nextToken());
                context.write(word, one);
            }
        }
    }

    static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        private Text k3=new Text();
        private IntWritable v3=new IntWritable();

        @Override
        protected void reduce(Text k2, Iterable<IntWritable> v2s, Context context) throws IOException, InterruptedException {
            this.k3.set(k2.toString());

            int sum=0;
            for(IntWritable v2: v2s){
                sum+=v2.get();
            }
            this.v3.set(sum);

            context.write(this.k3,this.v3);
        }
    }


}

shadow123666

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
阿里云远程MapReduce配置（使用公网）

1.hostslinux:内网ipwidows hosts添加:外网ip47.x.x.x 主机名2. etc/hadoop/slaveslinux主机名3. 4个xml配置把主机名“zs”改掉即可yarn-site.xml里面需配置0.0.0.0->外网访问8088端口yarn集群控制页面复制到idea的resources时把0.0.0.0改为主机名“zs”（即外网...
复制链接

扫一扫