package com.zyc.hadoop.mapreduce.flow;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/** * Created with IntelliJ IDEA. * * @Author: zyc2913@163.com * @Date: 2020/9/11 17:16 * @Version: 1.0 * @Description: SequenceFileInputFormat将小文件合并成大文件再分片 */ public class WebLogFlowSort { public static void main(String[] args) throws Exception { //1.获取job实例 Configuration conf = new Configuration(); Job job = Job.getInstance(conf);
//2.设置job运行的主类 job.setJarByClass(WebLogLoadFlow.class);
//3.设置Mapper的主类 job.setMapperClass(WLFMapper.class);
//4.设置Reducer的主类 job.setReducerClass(WebLogLoadFlow.WLLFReducer.class);
//6.设置Reducer输出的类型 job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class);
//7.设置job的输入路径 FileInputFormat.setInputPaths(job,new Path("C:\\Users\\Administrator\\Desktop\\loadflows"));
//8.设置job的输出路径 FileOutputFormat.setOutputPath(job,new Path("C:\\Users\\Administrator\\Desktop\\loadflowsort"));
//9.设置MapReduce的输入格式(默认使用的是TextInPutFormat) job.setInputFormatClass(SequenceFileInputFormat.class);
System.exit(job.waitForCompletion(true) ? 0 : 1 ); } public static class WLFMapper extends Mapper<LongWritable,Text,LongWritable,Text> { LongWritable k = new LongWritable(); Text v = new Text();
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] strs = value.toString().split("\t"); k.set(Long.parseLong(strs[3])); v.set(strs[0]); context.write(k,v); } }
}