将一些小文件合并成大文件
/**
-
将大量的小文件合并
-
@author DOIT_HANG_GE
-
@version 2019年3月1日
*/
public class FileMerger {
public static class FileMapper extends Mapper<LongWritable, Text, Text, Text> {
String fileName = null;
StringBuilder sb = new StringBuilder();
@Override
protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
FileSplit fs = (FileSplit) context.getInputSplit();
fileName = fs.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
sb.append(value.toString() + “\t”);
}
@Override
protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
context.write(new Text(fileName), new Text(sb.toString()));
}
}
public static class FileReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterableiters, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
context.write(key, iters.iterator().next());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
/ conf.set(“mapreduce.framework.name”,“local”);
conf.set(“fs,defaultFS”, “file:///”);/
Job job = Job.getInstance(conf);
job.setMapperClass(FileMapper.class);
job.setReducerClass(FileReducer.class);job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path("D:\\data\\merger\\input")); FileOutputFormat.setOutputPath(job, new Path("D:\\data\\merger\\output")); job.setNumReduceTasks(1); boolean b = job.waitForCompletion(true); System.exit(b?0:-1);
}
}