1需求
原始日志
baidu CN E [17/Jul/2018:17:07:50 +0800] 223.104.18.110 v2.go2yd.com http://v1.go2yd.com/user_upload/1531633977627104fdecdc68fe7a2c4b96b2226fd3f4c.mp4_bd.mp4 16384
目标 格式
baidu CN E 20180717170750 223.104.18.110 v2.go2yd.com http://v1.go2yd.com/user_upload/1531633977627104fdecdc68fe7a2c4b96b2226fd3f4c.mp4_bd.mp4 16384
原始日志经过mapper 处理成 目标格式
a编写日志处理
b编写mapper
c编写driver
public class LogUtils {
DateFormat sourceFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss z", Locale.ENGLISH);
DateFormat targetFormat = new SimpleDateFormat("yyyyMMddHHmmss");
public String parse(String log){
String result="";
try{
String[] splits=log.split("\t");
String cdn = splits[0];
String region = splits[1];
String level = splits[2];
String timestr = splits[3];
String time = timestr.substring(1,timestr.length()-1);
time = targetFormat.format(sourceFormat.parse(time));
String ip = splits[4];
String domain = splits[5];
String url = splits[6];
String traffic = splits[7];
StringBuilder stringBuilder = new StringBuilder("");
stringBuilder.append(cdn).append("\t")
.append(region).append("\t")
.append(level).append("\t")
.append(time).append("\t")
.append(ip).append("\t")
.append(domain).append("\t")
.append(url).append("\t")
.append(traffic);
result=stringBuilder.toString();
}catch (ParseException e){
e.printStackTrace();
}
return result;
}
}
public class LogETLMapper extends Mapper<LongWritable,Text,NullWritable,Text> {
/**
* map进行数据清晰
* 一条数据清洗完后进行输出
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
int length=value.toString().split("\t").length;
if(length==8){
LogUtils utils = new LogUtils();
String result=utils.parse(value.toString());
if(StringUtils.isNotBlank(result)){
context.write(NullWritable.get(),new Text(result));
}
}
}
}
public class LogETLDirver {
public static void main(String[] args) throws Exception{
if(args.length!=2){
System.err.print("请输入2个参数 input output");
System.exit(1);
}
String input = args[0];//hadoop/input/
String ouput = args[1];//hadoop/output/d=20180717
Configuration configuration =new Configuration();
FileSystem fileSystem = FileSystem.get(configuration);
Path outPutPath = new Path(ouput);
//判断文件是否存在,存在需要删除
if(fileSystem.exists(outPutPath)){
fileSystem.delete(outPutPath,true);
}
Job job =Job.getInstance(configuration);
job.setJarByClass(LogETLDirver.class);
job.setMapperClass(LogETLMapper.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job,new Path(input));
FileOutputFormat.setOutputPath(job,new Path(ouput));
job.waitForCompletion(true);
}
}
简单的日志处理就写好了 hadoop jar xxx.jar /input /output 执行就成功了