MapReduce的简单代码实现

1 篇文章 0 订阅
1 篇文章 0 订阅

需求:统计某个文档里面每个城市访问该网站的访客数

package mapperDemo1;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;


public class MapperDemo extends Configured implements Tool  {

  // Mapper方法
    public static class MapperDrv  extends Mapper<LongWritable,Text,Text,Text>{
        private Text outPutKey = new Text();
        private Text outPutValue = new Text();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] split = line.split("\t");
            if (split.length<36){
                return;
            }

            if (StringUtils.isBlank(split[24])){
                return;
            }
            else {
                if (StringUtils.isBlank(split[5])){
                    return;
                }
                else {
                   outPutKey.set(split[24]);
                   outPutValue.set(split[5]);
                   context.write(outPutKey,outPutValue);
                }

            }

        }
    }

  // Reduce方法
  // 这里注意hashset里面不能用hadoop里面的类型,否则会有错,我之前把hashset放到全局下,虽然程序没有出错,但是结果出错了,因为每次都把所有结果放到了hashset里面了
    public static class ReduceDiv extends Reducer<Text,Text,Text,IntWritable>{

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            Set<String> hashSet = new HashSet<String>();
            for (Text value:values) {
               hashSet.add(value.toString());
            }
            int sum = hashSet.size();
            //outPutValue.set(hashSet.size());
            context.write(key,new IntWritable(sum));
        }
    }
    @Override
    public int run(String[] args) throws Exception {
        //job
        Job job = Job.getInstance(this.getConf(),"UVTEST");
        job.setJarByClass(MapperDemo.class);
        //input
        Path inPath = new Path(args[0]);
        FileInputFormat.setInputPaths(job,inPath);
        //mapper
        job.setMapperClass(MapperDrv.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //shufflie,这里我还没有实现

        //reduce
        job.setReducerClass(ReduceDiv.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setNumReduceTasks(1);

        Path path = new Path(args[1]);
        FileOutputFormat.setOutputPath(job,path);

        return job.waitForCompletion(true)?0:1;
    }

    public static void main(String[] args) {
        Configuration configuration = new Configuration();
        try {
            int run = ToolRunner.run(configuration, new MapperDemo(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

然后将该项目打包成jar包,放到Linux里面,最后执行该jar包
输出结果:
这里写图片描述

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值