MapReducer的map和reducer的几种写法模板以及自定义数据类型

模板1

        最基本的MapReduecr程序的写法

流程:

        将数据从本地文件导入,经过MapReduecr数据分析,将分析结果存储到HDFS

案例代码

导入依赖


<!--hadoop的通用模块的依赖坐标-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <!--hadoop的对HDFS分布式文件系统访问的技术支持的依赖坐标-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <!--hadoop的客户端访问的依赖坐标-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
    </dependencies>

Mapper代码

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
 

public class TestMapper extends Mapper <LongWritable, Text,Text, IntWritable>{
    
	
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        
		// 逻辑代码...
    }
}

Reducer代码

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
 

public class TestReducer extends Reducer <Text, IntWritable,Text,IntWritable>{
    
	
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        
		// 逻辑代码...
    }
}

Tool代码

import com.cw.ct.analysis.mapper.TestMapper;
import com.cw.ct.analysis.reducer.TestReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.util.Tool;


/**
 *  数据分析的工具类
 */
public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] strings) throws Exception {

		// 初始化job任务
        Job job = Job.getInstance();
        job.setJarByClass(TestTool.class);

        
		//设置运行哪个map Task
        job.setMapperClass(TestMapper.class);
 
        //设置运行哪个reduce Task
        job.setReducerClass(TestReducer.class);
 
        //设置map Task的输出的(key,value)的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
 
        //设置reduce Task的输出的(key,value)的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
 
        //指定要处理的数据所在的位置
        FileInputFormat.setInputPaths(job,"hdfs://192.168.91.101:8020/wordcount/input/big.txt");
        //指定处理之后的结果数据保存位置
        FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.91.101:8020/wordcount/output"));
		
        boolean result = job.waitForCompletion(true);
        if (result){
            return JobStatus.State.SUCCEEDED.getValue();
        }else{
            return JobStatus.State.FAILED.getValue();
        }
    }

    public void setConf(Configuration configuration) {
        this.configuration = configuration

    }

    public Configuration getConf() {
        return this.configuration;
    }
}

启动类

public class Bootstrap {
    public static void main(String[] args) throws Exception {

		// 写法一:
        int result = ToolRunner.run(new TestTool(), args);
		
		// 写法二:
		// new Configuration():将MapReducer程序运行的环境中的配置文件自动加载(core.xml,hbase.xml...)
		// new Configuration()其实写不写都可以,ToolRunner会检测tool中的Configuration是否不为null值,为null值自动创建一个Configuration
		int result = ToolRunner.run(new Configuration(),new TestTool(), args);

        if (result == JobStatus.State.SUCCEEDED.getValue()){
            System.out.println("运行成功!");
            System.exit(0);
        }else{
            System.out.println("运行失败!");
            System.exit(1);
        }
    }
}

模板2

        Hbase集成MapReducer

流程

        从HDFS中读取文件数据,将数据存储到Hbase

案例代码

导入依赖

由于HBase的依赖包中已经集成了hadoop等jar包,因此只需导入HBase包即可

<dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.3.1</version>
</dependency>

Mapper代码

public class TestMapper extends Mapper<LongWritable,Text,LongWritable, Text> {


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 逻辑代码...
    }
}

Reducer代码

/**
* 继承TableReducer类,实现与HBase打交道
*
*
*/
public class TestReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 示范代码,仅供参考
        for(Text value:values){

            //获取每行的数据
            String [] fields=value.toString().split("\t");

            //构建Put对象
            Put put = new Put(Bytes.toBytes(fields[0]));

            //4.给Put对象赋值
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(fields[2]));

            context.write(NullWritable.get(),put);
        }
    }
}

tool代码

public class TestTool implements Tool {

    //定义一个Configuration
    private Configuration configuration = null;

    public int run(String[] args) throws Exception {

        //1.获取Job对象
        Job job = Job.getInstance(configuration);

        //2.设置驱动类路径
        job.setJarByClass(TestTool.class);

        //3.设置Mapper和Mapper输出的KV类型
        job.setMapperClass(TestMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        //4.设置Reducer类
        TableMapReduceUtil.initTableReducerJob(args[1],TestReducer.class,job);

        //5.设置输入参数
        FileInputFormat.setInputPaths(job,new Path(args[0]));

        //6.提交任务
        boolean result = job.waitForCompletion(true);

        return result?0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration=configuration;

    }

    public Configuration getConf() {
        return configuration;
    }
}

启动类

public class Bootstrap {
    public static void main(String[] args) throws Exception {

        int run = ToolRunner.run(cnew TestTool(), args);
        System.exit(run);

    }
}

模板3

流程

        从HBase中读取数据,再将数据存储到HBase

案例代码

Mapper代码

public class TestMapper extends TableMapper<ImmutableBytesWritable, Put> {

    /**
     *  同一个rowKey的属于一个Map任务,也就是按照rowKey划分Map任务
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

		// 以下为示范代码
        //构建Put对象
        Put put = new Put(key.get());

        //获取数据
        for (Cell cell:value.rawCells()){

            //给Put对象赋值
                put.add(cell);
        }

        //写出
        context.write(key,put);
    }
}

Reducer代码

public class TestReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for(Put put:values){
            context.write(NullWritable.get(),put);
        }

    }
}

tool代码

public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] args) throws Exception {

        Job job = Job.getInstance(configuration);
        job.setJarByClass(FruitDriver2.class);

		// 设置Mapper,fruit1为HBase表名,new Scan()为全局扫描读取数据
        TableMapReduceUtil.initTableMapperJob("fruit1",new Scan(),TestMapper.class,ImmutableBytesWritable.class,Put.class,job);
		
		// 设置Reducer,fruit2为HBase表名
        TableMapReduceUtil.initTableReducerJob("fruit2",TestReducer.class,job);

        boolean result = job.waitForCompletion(true);

        return result ? 0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration=configuration;

    }

    public Configuration getConf() {
        return configuration;
    }
}

启动类

public class Bootstrap{

     public static void main(String[] args) throws Exception {

        int run = ToolRunner.run(configuration, new FruitDriver2(), args);
        System.exit(run);
    }
}

模板4

        将MapReducer与Mysql和HBase集成

流程

        MapReducer从HBase中读取数据,然后数据分析,将分析结果保存到Mysql

案例代码

Mapper代码

public class TestMapper extends TableMapper<Text, Text> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
		// 逻辑代码...
    }
}

Reducer代码

public class TestReducer extends Reducer<Text, Text,Text,Text> {

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 逻辑代码...
    }
}

 自定义输出格式

/**
 *  MySQL的数据格式化输入对象
 */
public class MySqlTestOutputFormat extends OutputFormat<Text, Text> {

    private FileOutputCommitter committer = null;

    // 实现一个静态内部类
    protected static class MySQLRecordWrite extends RecordWriter<Text,Text>{

		public MySQLRecordWrite() {
            
        }

        /**
         *  输出数据
         * @param text
         * @param text2
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        public void write(Text text, Text text2) throws IOException, InterruptedException {
            // 实现将数据存储到Mysql中
        }

        /**
         *  释放资源
         * @param taskAttemptContext
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            
        }
    }
	
	/**
	* 将自定义的内部读写类返回
	*/
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new MySQLRecordWrite();
    }

    @Override
    public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException {

    }

	// 以下的方法代码照搬即可
    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {

        if (this.committer == null) {
            Path output = getOutputPath(taskAttemptContext);
            this.committer = new FileOutputCommitter(output,taskAttemptContext);
        }

        return this.committer;
    }

    private static Path getOutputPath(JobContext job) {

        String name = job.getConfiguration().get(FileOutputFormat.OUTDIR);
        return name == null ? null: new Path(name);
    }
}

tool代码

/**
 *  数据分析的工具类
 */
public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] strings) throws Exception {

        Job job = Job.getInstance();
        job.setJarByClass(AnalysisTextTool.class);

        // 设置mapper
        TableMapReduceUtil.initTableMapperJob(
                Names.TABLE.getValue(),
                new Scan(),
                TestMapper.class,
                Text.class,
                Text.class,
                job
        );

        // 设置reducer
        job.setReducerClass(TestReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
		
		// 设置自定义的输出格式
        // outputformat
        job.setOutputFormatClass(MySqlTextOutputFormat.class);

        boolean result = job.waitForCompletion(true);
        return result ? 0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration = configuration

    }

    public Configuration getConf() {
        return this.configuration;
    }
}

启动类

public class Bootstrap{

     public static void main(String[] args) throws Exception {

        int run = ToolRunner.run( new TestTool(), args);
        System.exit(run);
    }
}

自定义数据类型

         有时,在MapReducer中的数据类型是无法满足我们的需求的,因此我们要定义一些自定义数据对象

模板

自定义Key类型

/**
 *  自定义数据分析Key
 *  Writable:是基本数据类接口
 *  Comparable:key比较接口
 *
 */
public class TestKey implements WritableComparable<TestKey> {


    private String tel;
    private String date;


    public TestKey() {
    }

    public TestKey(String tel, String date) {
        this.tel = tel;
        this.date = date;
    }

    public String getTel() {
        return tel;
    }

    public void setTel(String tel) {
        this.tel = tel;
    }

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    /**
     *  比较tel,date,如果相同的,发送到同一个reducer
     * @param key
     * @return
     */
    public int compareTo(TestKey key) {

		// 以下为示范代码
		
		
        int result = tel.compareTo(key.getTel());
        // 如果tel相同则比较date
        if (result == 0){
            result = date.compareTo(key.getDate());
        }
        // 返回0是相同的意思
        return result;
    }

    /**
     *  写数据,必须实现
     * @param dataOutput
     * @throws IOException
     */
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(tel);
        dataOutput.writeUTF(date);

    }

    /**
     *  读数据,必须实现
     * @param dataInput
     * @throws IOException
     */
    public void readFields(DataInput dataInput) throws IOException {
		 // 读取顺序要和写数据一致
        tel = dataInput.readUTF();
        date = dataInput.readUTF();

    }
}

自定义Value类型

/**
 *  自定义Value
 *  只需实现Writable即可,因为value不用比较
 */
public class TestValue implements Writable {


    private String sumCall;
    private String sumDuration;

    public TestValue() {

    }

    public TestValue(String sumCall, String sumDuration) {
        this.sumCall = sumCall;
        this.sumDuration = sumDuration;
    }

    public String getSumCall() {
        return sumCall;
    }

    public void setSumCall(String sumCall) {
        this.sumCall = sumCall;
    }

    public String getSumDuration() {
        return sumDuration;
    }

    public void setSumDuration(String sumDuration) {
        this.sumDuration = sumDuration;
    }

	/**
     *  写数据,必须实现
     * @param dataOutput
     * @throws IOException
     */
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(sumCall);
        dataOutput.writeUTF(sumDuration);

    }
	
	/**
     *  读数据,必须实现
     * @param dataInput
     * @throws IOException
     */
    public void readFields(DataInput dataInput) throws IOException {
		// 读取顺序要和写数据一致
        sumCall = dataInput.readUTF();
        sumDuration = dataInput.readUTF();
    }
}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值