mapreduce topN
package mr.topN;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TopNJob extends Configuration implements Tool{
private String input =null;
private String output =null;
private String N = null;
@Override
public Configuration getConf() {
return new Configuration();
}
@Override
public void setConf(Configuration conf) {
}
@Override
public int run(String[] arg0) throws Exception {
setArgs(arg0);
checkParam();
Configuration conf = new Configuration();
if(N==null||"".equals(N.trim())){
conf.set("N", N);
}
Job job = new Job(conf, "TopNJob");
job.setJarByClass(TopNJob.class);
job.setMapperClass(TopNMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(TopNReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
Path path = new Path(output);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(path)){
fs.delete(path,true);
}
FileOutputFormat.setOutputPath(job, path);
return job.waitForCompletion(true) ? 0 : 1;
}
private void checkParam() {
if(input==null||"".equals(input.trim())){
System.out.println("no input path!");
userMaunel();
System.exit(-1);
}
if(output==null||"".equals(output.trim())){
System.out.println("no output path!");
userMaunel();
System.exit(-1);
}
}
private void userMaunel() {
System.out.println("<args> eg -i input -o output [-n top defalt=10]");
}
//-i xx -o xxx -n xx
private void setArgs(String[] args) {
for(int i=0;i<args.length;i++){
if("-i".equals(args[i])){
input = args[++i];
}
if("-o".equals(args[i])){
output = args[++i];
}
if("-n".equals(args[i])){
N = args[++i];
}
}
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new TopNJob(), args);
}
}
package mr.topN;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TopNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
int len;
int[] top;
@Override
protected void cleanup(
Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
for(int x=1;x<len+1;x++){
context.write(new IntWritable(top[x]), new IntWritable(top[x]));
}
}
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
String line = value.toString().trim();
if(line.length()>0){
int payment = Integer.parseInt(line);
add(payment);
}
}
private void add(int payment) {
top[0] = payment;
Arrays.sort(top);
}
@Override
protected void setup(
Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
len = context.getConfiguration().getInt("N", 10);
top = new int[len+1];
}
}
package mr.topN;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class TopNReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
int len;
int[] top;
@Override
protected void cleanup(
Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
for(int i=len;i>0;i--){
context.write(new IntWritable(len-i+1), new IntWritable(top[i]));
}
}
@Override
protected void reduce(
IntWritable arg0,
Iterable<IntWritable> arg1,
Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context arg2)
throws IOException, InterruptedException {
add(arg0.get());
}
private void add(int payment) {
top[0] = payment;
Arrays.sort(top);
}
@Override
protected void setup(
Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
len = context.getConfiguration().getInt("N", 10);
top = new int[len+1];
}
}