package bin;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sort {
// 对输入文件中数据进行排序。输入文件中的每行内容均为一个数字,即一个数据。
// 要求在输出中每行有两个间隔的数字,其中,第一个代表原始数据在原始数据集中的位次,第二个代表原始数据。
// private static int a;
public static class SortMap extends Mapper<Object, Text, IntWritable, IntWritable> {
//这个参数第二个为甚么要用Text而不是IntWritable呢??
private static IntWritable data = new IntWritable();
public void map(Object key,Text value,Context context) {
String line = value.toString();
data.set(Integer.parseInt(line));//sting类型转换为int类型,要求string文本中前缀不能有空格,而且也因此要求txt文件中不能有空行
try {
context.write(data, new IntWritable(1));
} catch (IOException | InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static class SortReduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
private static IntWritable lineNumber=new IntWritable(1);
@SuppressWarnings("unused")
public void reduce(IntWritable key,Iterable<IntWritable> values,Context context) {
IntWritable previous =new IntWritable(key.get()-1);
for (IntWritable value : values) {
if (key.get()!=previous.get()) {
try {
context.write(lineNumber, key);
} catch (IOException | InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
lineNumber =new IntWritable(lineNumber.get()+1);
previous=key;
}
}
}
/**
* @param args
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
Configuration configuration=new Configuration();
String[] otherArgs=new GenericOptionsParser(configuration, args).getRemainingArgs();
if (otherArgs.length!=2) {
System.out.println("Usage: Sort <in> <out>");
System.exit(2);
}
Job job=new Job(configuration, "Tacert Sort");
job.setJarByClass(Sort.class);
job.setMapperClass(SortMap.class);
// job.setCombinerClass(SortReduce.class); 这里加上combiner之后结果是错的
job.setReducerClass(SortReduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
我的hadoop初学程序------简单数据排序-------Sort
最新推荐文章于 2023-12-25 15:57:54 发布