packagetest;importjava.io.IOException;importjava.text.ParseException;importjava.text.SimpleDateFormat;importjava.util.Date;importjava.util.Locale;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.NullWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.input.TextInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public classFilter {public static class Map extends Mapper{private static Text newKey = newText();/*public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
System.out.println(line);
String arr[] = line.split(" ");
newKey.set(arr[1]);
context.write(newKey, NullWritable.get());
System.out.println(newKey);
}
}*/
public void map(Object key, Text value, Context context) throwsIOException, InterruptedException {
String S1=value.toString ();
LogParser parser= newLogParser();final String[] array =parser.parse(S1);
System.out.println(S1);/*System.out.format(
"解析结果: ip=%s, time=%s,day=%s, traffic=%s, type=%s,id=%s",
array[0], array[1], array[2], array[3], array[4],array[5]);*/String a=array[0];
String u=array[1];
String c=array[2];
String d=array[3];
String e=array[4];
String f=array[5];
String str= a +" "+u +" "+c+" "+d+" "+e+" "+f;
newKey.set(str);
context.write(newKey, NullWritable.get());
System.out.println(newKey);
}
}public static class Reduce extends Reducer{public void reduce(Text key, Iterablevalues, Context context)throwsIOException, InterruptedException {
context.write(key, NullWritable.get());
}
}public static void main(String[] args) throwsIOException, ClassNotFoundException, InterruptedException {
Configuration conf= newConfiguration();
System.out.println("start");
Job job= new Job(conf, "filter");
job.setJarByClass(Filter.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path in= new Path("hdfs://localhost:9000/user/hadoop/in/Result");
Path out= new Path("hdfs://localhost:9000/user/hadoop/out");
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}static class MyMapper extendsMapper{
LogParser logParser= newLogParser();
Text outputValue= newText();protected voidmap(
LongWritable key,
Text value,
org.apache.hadoop.mapreduce.Mapper.Context context)throwsjava.io.IOException, InterruptedException {final String[] parsed =logParser.parse(value.toString());//step1.过滤掉静态资源访问请求
if (parsed[2].startsWith("GET /static/")|| parsed[2].startsWith("GET /uc_server")) {return;
}//step2.过滤掉开头的指定字符串
if (parsed[2].startsWith("GET /")) {
parsed[2] = parsed[2].substring("GET /".length());
}else if (parsed[2].startsWith("POST /")) {
parsed[2] = parsed[2].substring("POST /".length());
}//step3.过滤掉结尾的特定字符串
if (parsed[2].endsWith(" HTTP/1.1")) {
parsed[2] = parsed[2].substring(0, parsed[2].length()- " HTTP/1.1".length());
}//step4.只写入前三个记录类型项
outputValue.set(parsed[0] + "\t" + parsed[1] + "\t" + parsed[2]);
context.write(key, outputValue);
}
}static class MyReducer extendsReducer{protected voidreduce(
LongWritable k2,
java.lang.Iterablev2s,
org.apache.hadoop.mapreduce.Reducer.Context context)throwsjava.io.IOException, InterruptedException {for(Text v2 : v2s) {
context.write(v2, NullWritable.get());
}
};
}/** 日志解析类*/
static classLogParser {public static final SimpleDateFormat FORMAT = newSimpleDateFormat("d/MMM/yyyy:HH:mm:ss", Locale.ENGLISH);public static final SimpleDateFormat dateformat1 = newSimpleDateFormat("yyyy-MM-dd HH:mm:ss");/*** 解析英文时间字符串
*
*@paramstring
*@return*@throwsParseException*/
privateDate parseDateFormat(String string) {
Date parse= null;try{
parse=FORMAT.parse(string);
}catch(ParseException e) {
e.printStackTrace();
}returnparse;
}/*** 解析日志的行记录
*
*@paramline
*@return数组含有5个元素,分别是ip、时间、日期、状态、流量*/
publicString[] parse(String line) {
String ip=parseIP(line);
String time=parseTime(line);
String day=parseday(line);
String traffic=parseTraffic(line);
String type=parsertype(line);
String id=parseid( line);return newString[] { ip, time, day,traffic , type, id };
}privateString parseIP(String line) {
String ip= line.split(",")[0].trim();returnip;
}privateString parseTime(String line) {final int first = line.indexOf(",");final int last = line.indexOf(" +0800,");
String time= line.substring(first + 1, last).trim();
Date date=parseDateFormat(time);returndateformat1.format(date);
}privateString parseday(String line) {
String riqi= line.split(",")[2].trim();returnriqi;
}privateString parseTraffic(String line) {
String riqi= line.split(",")[3].trim();returnriqi;
}//private String parseTraffic(String line) {//final String trim = line.substring(line.lastIndexOf(",") + 1)//.trim();//String traffic = trim.split(" ")[0];//return traffic;//}//private String parsertype(String line) {//final int first = line.indexOf(",");//final int last = line.lastIndexOf(",");//String url = line.substring(first + 1, last);//return url;//}
privateString parsertype(String line) {
String riqi= line.split(",")[4].trim();returnriqi;
}privateString parseid(String line) {final String trim = line.substring(line.lastIndexOf(",") + 1)
.trim();
String id= trim.split(" ")[0];returnid;
}
}
}