任务描述:
一组数据,按照年份的不同将其分别存放在不同的文件里
example Data:
2013 1
2013 5
2014 5
2014 8
2015 9
2015 4
Code:
package mrTest;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class zidingyiPartition {
public static class myPartition extends Partitioner<LongWritable, LongWritable>{
public int getPartition(LongWritable key, LongWritable value, int numTaskReduces) {
// TODO Auto-generated method stub
if(key.get()==2013){
return 0;
}else if(key.get()==2014){
return 1;
}else{
return 2;
}
}
}
public static class Map extends Mapper<Object, Text, LongWritable,LongWritable>{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
String[] line = value.toString().split("\t");
context.write( new LongWritable(Integer.parseInt(line[0])) , new LongWritable(Integer.parseInt(line[1])) );
}
}
public static class Reduce extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable>{
public void reduce(LongWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException{
for (LongWritable longWritable : values) {
context.write(key, longWritable);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
Job job = new Job();
job.setJarByClass(zidingyiPartition.class);
// 1
FileInputFormat.addInputPath(job, new Path(args[0]));
// 2
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(LongWritable.class);
// 3
job.setPartitionerClass(myPartition.class);
// 4
// 5
job.setNumReduceTasks(3);
// 6
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
// 7
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
结果展示:
扫一扫 关注微信公众号!号主 专注于搜索和推荐系统,尝试使用算法去更好的服务于用户,包括但不局限于机器学习,深度学习,强化学习,自然语言理解,知识图谱,还不定时分享技术,资料,思考等文章!
【技术服务】,详情点击查看:https://mp.weixin.qq.com/s/PtX9ukKRBmazAWARprGIAg