例题:MR求年龄文件中奇数行和偶数行的平均值

解题思路

将原有的<k1,v1>(偏移量,行值) --> <k1,v1>(行号,行值),使其按行号一行一行的读

  1. 重写TextInputFormat类
    构建【LineNumInputFormat.class】类 作用:创建行号阅读器和设置可切分
    重写俩个方法
    –>createRecordReader()
    return new LineNumRecordReader();
    –>isSplitable()
    return false; //按行号读取,不可切分
  2. 重写LineRecordReader类
    构建【LineNumRecordReader.class】类 作用:将kv转换为行号和行值
    a. 构造initialize() 方法
     {
      start, end, pos, in, 赋值
     }
    b. 构造nextKeyValue() 方法
     {
      读取一行,将行号赋值给key,行值赋值给value
     }
    c.构造getCurrentKey()
     {
      return key; //返回当前key值
     }
    d.构造getCurrentValue()
     {
      return value; //返回当前value值
     }
    e.构造getProgress()
     return 0;//返回当前进程为0
    f.构造close()
     in.close(); //关闭流
  3. 构造Mapper类
    判断行号的奇偶,并输出行值
  4. 构造Reduce类
    求出平均数并写出数据
  5. 构造Job类

各类源码

LineNumInputFormat.class
package com.dragon.hadoop.mr.age;
import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/*
 * 行号输入格式化类
 */
public class LineNumInputFormat  extends FileInputFormat<LongWritable, Text>{

	@Override
	public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context)
			throws IOException, InterruptedException {
		
		return new LineNumRecordReader(); 
	}	
	
	@Override
	protected boolean isSplitable(JobContext context, Path filename) {
		
		return false;//设置不可切分
	}
	
}

FileNumRecordReader.class
package com.dragon.hadoop.mr.age;

import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;

/*
 * 行号阅读器
 */
public class LineNumRecordReader extends RecordReader<LongWritable, Text>{
	
	  private long start;
	  private long pos;//原来的偏移量,现在为行号
	  private long end;
	  private LineReader in;
	  private FSDataInputStream fileIn;
	  private LongWritable key;
	  private Text value;
	

	@Override
	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
		 	FileSplit _split = (FileSplit) split; //得到文件切分的对象
		 	Path file=_split.getPath();//获取文件
		 	FileSystem fs=file.getFileSystem(context.getConfiguration());//获取分布式文件系统
		    fileIn =fs.open(file);//打开文件,获取流
		    fileIn.seek(start);
		    in = new LineReader(fileIn);//传入流获取LineReader
		    start = _split.getStart();
		    end = start + _split.getLength();
		    pos=1;//行号为1		
	}

	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		if (key == null) {
			key = new LongWritable();
		}
		key.set(pos);//键赋值
		
		if (value == null) {
			value = new Text();
		}
		//从InputStream读入到给定的文本
		if (in.readLine(value)==0){
			return false;	//已经读完,没有下一个键值可读,返回false
		}
		pos++;				//一行一行的读
		return true;
	}

	@Override
	public LongWritable getCurrentKey() throws IOException, InterruptedException {
		//返回当前的key
		return key;
	}

	@Override
	public Text getCurrentValue() throws IOException, InterruptedException {
		//返回当前的value
		return value;
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {
		//进度0
		return 0;
	}

	@Override
	public void close() throws IOException {
		//关闭流
		in.close();
	}

}

LineNumMapper.class
package com.dragon.hadoop.mr.age;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class LineNumMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable>{
	
	IntWritable i=new IntWritable();
	
	@Override
	protected void map(LongWritable key, Text value,
			Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		
		i.set(Integer.parseInt(value.toString()));//value转成整型	
		if(key.get()%2==0){
			//用2来代表偶数行,并写出数据
			context.write(new IntWritable(2), i);
		}else{
			//用3来代表奇数行,并写出数据
			context.write(new IntWritable(3), i);
		}
	}
}

LineNumReducer.class
package com.dragon.hadoop.mr.age;
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class LineNumReducer extends Reducer<IntWritable, IntWritable, Text, IntWritable>{
	private Text _key=new Text();
	@Override
	protected void reduce(IntWritable key, Iterable<IntWritable> values,
			Reducer<IntWritable, IntWritable,Text, IntWritable>.Context context)
			throws IOException, InterruptedException {
		int sum=0;
		int i=0;
		for (IntWritable value :values ) {
			sum+=value.get();
			i++;
		}
		
		if(key.get()==2){
			_key.set("偶数的平均值:");
		}else{
			_key.set("奇数的平均值:");
		}
		context.write(_key,new IntWritable(sum/i));
		//直接输出两个结果,参数类型一个IntWritable,NullWritable
		//context.write(NullWritable.get(),new IntWritable(sum/i));
	}
}

LineNumJob.class
package com.dragon.hadoop.mr.age;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class LineNumJob {
	public static void main(String[] args) throws ClassNotFoundException, InterruptedException, IOException {
		
		Configuration conf =new Configuration();
		conf.set("mapreduce.framework.name", "local");
		Path outfile =new Path("file:///f:/jg");
		FileSystem fs=outfile.getFileSystem(conf);
		if(fs.exists(outfile)){
			fs.delete(outfile,true);
		}
		
		try {
			Job job =Job.getInstance(conf);
			//通过主类全名搜索对应的jar包
			job.setJarByClass(LineNumJob.class);
			//设置mapper类
			job.setMapperClass(LineNumMapper.class);
			//设置reducer类
			job.setReducerClass(LineNumReducer.class);
			//设置job的名字
			job.setJobName("LineNumJob");
			
			//设置map阶段输出键值对的类型
			job.setMapOutputKeyClass(IntWritable.class);
			job.setMapOutputValueClass(IntWritable.class);
			
			//设置reduce阶段的输出键值对的类型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(IntWritable.class);
			
			//设置InputFormatClass
			job.setInputFormatClass(LineNumInputFormat.class);
					
			//设置job工作的文件输入路径
			FileInputFormat.setInputPaths(job, new Path("file:///f:/linenum"));
			
			//设置job的文件输出路径
			FileOutputFormat.setOutputPath(job,outfile);
				System.exit(job.waitForCompletion(true) ? 0 : 1);									
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值