Hadoop 链式处理

最新推荐文章于 2020-11-01 12:41:57 发布

一只小菜鸟(*￣︶￣)

最新推荐文章于 2020-11-01 12:41:57 发布

阅读量163

点赞数

分类专栏： Hadoop 文章标签： hadoop hadoop 链式处理 hadoop chain

本文链接：https://blog.csdn.net/mao502010435/article/details/89089516

版权

Hadoop 专栏收录该内容

22 篇文章 0 订阅

订阅专栏

APP类：

package com.mao.hdfs.chain;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 链条式job任务
 */
public class WCChainApp {
    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        Job job = Job.getInstance(conf);

        //设置job的各种属性
        job.setJobName("WCChainApp");                        //作业名称
        job.setJarByClass(WCChainApp.class);                 //搜索类
        job.setInputFormatClass(TextInputFormat.class); //设置输入格式

        //添加输入路径
        FileInputFormat.addInputPath(job,new Path("d:/mr/skew"));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path("d:/mr/skew/out"));

        //在mapper链条上增加Mapper1
        ChainMapper.addMapper(job,WCMapMapper1.class, LongWritable.class,Text.class,Text.class,IntWritable.class, conf);
        //在mapper链条上增加Mapper2
        ChainMapper.addMapper(job,WCMapMapper2.class, Text.class, IntWritable.class,Text.class,IntWritable.class, conf);

        //在reduce链条上设置reduce
        ChainReducer.setReducer(job,WCReducer.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
        //在reduce链条上增加Mapper2
        ChainReducer.addMapper(job,WCReduceMapper1.class, Text.class, IntWritable.class,Text.class,IntWritable.class, conf);

        job.setNumReduceTasks(3);                       //reduce个数

        job.waitForCompletion(true);
    }
}

Mapper1类：读取文件内容

package com.mao.hdfs.chain;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 *  mapper1   
 */
public class WCMapMapper1 extends Mapper<LongWritable, Text, Text, IntWritable>{


    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        Text keyOut = new Text();
        IntWritable valueOut = new IntWritable();
        String[] arr = value.toString().split(" ");
        for(String s : arr){
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut,valueOut);
        }
    }
}

mapper2类：对mapper1的输出进行处理

package com.mao.hdfs.chain;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 屏蔽敏感词
 */
public class WCMapMapper2 extends Mapper<Text, IntWritable, Text, IntWritable>{


    protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {
        if(!key.toString().equals("falungong")){
            context.write(key,value);
        }
    }
}

reduce类：

package com.mao.hdfs.chain;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Reducer
 */
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
    /**
     * reduce
     */
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count = 0 ;
        for(IntWritable iw : values){
            count = count + iw.get() ;
        }
        context.write(key,new IntWritable(count));
    }
}

reduceMapper类：对reduce输出进行过滤

package com.mao.hdfs.chain;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 顾虑单词个数
 */
public class WCReduceMapper1 extends Mapper<Text,IntWritable, Text, IntWritable>{
    protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {
        if(value.get() > 5){
            context.write(key,value);
        }
    }
}

一只小菜鸟(*￣︶￣)

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hadoop 链式处理

APP类：package com.mao.hdfs.chain;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;i...
复制链接

扫一扫

专栏目录