Hadoop_MapReduce-Key的分组和排序[]
package com.lius.hadoop.mapReduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* MapReduce-Key的分组和排序
* @author Administrator
*
*/
public class parationKeyGroupAndSort {
//创建支持Compareable的Bean
static class Bean implements WritableComparable<Bean>{
private String domain;
private long width;
public Bean() {
}
public Bean(String domain,long width) {
setValue(domain,width);
}
public String getDomain() {
return domain;
}
public long getWidth() {
return width;
}
private void setValue(String domain,long width) {
this.domain = domain;
this.width = width;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return String.format("%s\t%s", this.domain,this.width);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(this.domain);
out.writeLong(this.width);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.domain = in.readUTF();
this.width = in.readLong();
}
@Override
public int compareTo(Bean o) {
// TODO Auto-generated method stub
return (int)(this.width-o.width);
}
}
//创建Mapper类
static class KeyMapper extends Mapper<LongWritable, Text, Bean, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Bean, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String val = value.toString();
String[] v = val.split("\t");
Bean bean = new Bean(v[3],v[0].length());
context.write(bean, new Text(v[1]));
}
}
//创建Reduce处理类
static class keyReduce extends Reducer<Bean, Text, Bean, Text>{
@Override
protected void reduce(Bean key, Iterable<Text> values, Reducer<Bean, Text, Bean, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
for(Text param:values) {
context.write(key, param);
}
}
}
//创建Partitioner分区类
static class keyParation extends Partitioner<Bean, Text>{
private static Map<String,Integer> partitionerMap = new HashMap<String,Integer>();
static {
partitionerMap.put("G1", 0);
partitionerMap.put("G2", 1);
partitionerMap.put("G3", 2);
}
@Override
public int getPartition(Bean key, Text value, int numPartitions) {
// TODO Auto-generated method stub
Integer result = null;
result = partitionerMap.get(key.getDomain().toString());
result = result==null?4:result;
return result;
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration(); //创建配置类
conf.set("mapred.jar", "E://jars/keyGroupAndSort.jar"); //指定任务执行的jar
Job job = Job.getInstance(conf); //通过配置类创建任务
job.setJarByClass(parationKeyGroupAndSort.class); //指定任务jar的启动类
job.setMapperClass(KeyMapper.class); //指定任务执行的Mapper类
job.setReducerClass(keyReduce.class); //指定任务执行的Reduce处理类
job.setPartitionerClass(keyParation.class); //指定任务指定的Partitioner分区类
// job.setMapOutputKeyClass(Bean.class); //指定Mapper类执行后生成的key类型
// job.setMapOutputValueClass(Text.class); //指定Mapper类执行后生成的value类型
job.setOutputKeyClass(Bean.class); //指定任务执行完成后生成的key类型
job.setOutputValueClass(Text.class); //指定任务执行完成后生成的key类型
job.setNumReduceTasks(4); //指定reduce task的数量
Path inPath = new Path("/usr/lius/parationData"); //指定任务指行需要处理的数据在HDFS上存储的位置
Path outPath = new Path("/usr/lius/parationKeyGroupAndSort");//指定任务指行后生成的数据结果需要保存在HDFS上的位置
FileSystem fs = outPath.getFileSystem(conf); //通过Path与配置类获取HDFS的操作类
if(!fs.exists(inPath)) {
fs.copyFromLocalFile(new Path("E://paration.txt"), inPath);//将本地最新数据上传到HDFS中
}
if(fs.exists(outPath)) { //HDFS中结果数据存在就删除
fs.delete(outPath, true);
}
FileInputFormat.setInputPaths(job, inPath); //任务与处理数据路径进行绑定
FileOutputFormat.setOutputPath(job, outPath); //任务与结果数据路径进行绑定
job.waitForCompletion(true); //指定等待任务执行完成
}
}