hadoop mapreduce tomcat日志分析

1、在windows 本机解压缩 hadoop


2、cmd切换到 hadoop的bin目录 执行 hadoop -version 查看是否满足hadoop运行环境

3、cmd切换到hadoop的sbin目录 执行 start-all.cmd 启动 hadoop单机

4、创建hdfs目录

hadoop fs -mkidr /hdfs

5、上传 tomcat 日志

hadoop fs -put  f:/tomcat/log/localhost*  /hdfs

6、编写mapreduce分析tomcat日志 ,代码结构如图


Mapper_.java

package com.fw.hadoop.example.log;


import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class Mapper_ extends Mapper<LongWritable,Text,Text,IntWritable>{ 
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ 
String line=value.toString();
String method="NONE";
String ip=ip(line);
if(post(line)){
method="POST";
}
if(get(line)){
method="GET";
}
context.write(new Text(ip+"-"+method), new IntWritable(0));
} 
public static String ip(String str){
Pattern pattern=Pattern.compile("[0-9,.]*");
Matcher matcher = pattern.matcher(str);
String result="";
if (matcher.find()) {     
result = matcher.group(0);  
         } 
return result;
}
public static boolean post(String str){
Pattern pattern=Pattern.compile("] \"POST");
Matcher matcher = pattern.matcher(str);
return matcher.find();
}
public static boolean get(String str){
Pattern pattern=Pattern.compile("] \"GET");
Matcher matcher = pattern.matcher(str);
return matcher.find();
}
 }



Reducer_.java

package com.fw.hadoop.example.log;


import java.io.IOException;


import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class Reducer_  extends Reducer<Text ,IntWritable,Text,IntWritable>{ 
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{
int count=1; 
for(IntWritable val:values){
count+=val.get(); 
} 
context.write(key, new IntWritable(count));
}
 }



Main.java

package com.fw.hadoop.example.log;
import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Main {
public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,"tomcat日志分析");
    job.setJarByClass(Main.class);
    job.setMapperClass(Mapper_.class);
    job.setCombinerClass(Reducer_.class);
    job.setReducerClass(Reducer_.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}




执行之后查看  http://localhost:50070/explorer.html#/hdfs/log1

我本机日志分析结果为 :


-NONE 2
1.192.34.102-GET 2
1.193.127.216-GET 2
101.199.108.119-GET 2
101.199.108.120-GET 2
101.199.108.52-GET 2
101.199.108.54-GET 2
101.199.112.45-GET 2
101.199.112.52-GET 2
101.226.102.140-GET 3
101.226.102.145-GET 2
101.226.102.146-GET 3
101.226.102.237-GET 2
101.226.102.52-GET 2
101.226.102.79-GET 2
101.226.102.89-GET 2
101.226.102.94-GET 2
101.226.102.97-GET 2
101.226.114.166-GET 2
101.226.125.109-GET 2
101.226.125.113-GET 2
101.226.125.118-GET 2
101.226.125.119-GET 2
101.226.125.120-GET 2
101.226.125.15-GET 2
101.226.125.18-GET 2
101.226.125.19-GET 2
101.226.33.218-GET 2
101.226.33.220-GET 2
101.226.33.223-GET 2
101.226.65.102-GET 2
101.226.66.173-GET 2
101.226.66.177-GET 2
101.226.66.178-GET 2
101.226.66.181-GET 2
101.226.69.109-GET 2
101.226.69.112-GET 2
101.226.85.67-GET 2
101.226.89.14-GET 2
101.226.93.201-GET 2
101.226.93.241-GET 2
101.226.99.196-GET 2
103.221.141.147-GET 3
103.221.141.147-POST 3
106.120.160.109-GET 4
106.120.161.68-GET 3
112.65.193.15-GET 2
115.60.62.127-GET 2
117.185.27.113-GET 4
117.185.27.115-GET 4
140.207.118.16-GET 2
140.207.185.123-GET 2
140.207.185.125-GET 3
140.207.185.126-GET 2
140.207.54.140-GET 2
140.207.54.144-GET 2
140.207.54.158-GET 4
140.207.54.199-GET 2
140.207.54.218-GET 3
140.207.63.102-GET 2
140.207.63.103-GET 2
163.177.82.107-GET 2
163.177.82.107-NONE 2
171.10.205.79-POST 2
171.10.4.159-GET 2
171.10.4.159-POST 2
171.10.69.92-GET 2
171.10.69.92-POST 2
171.10.92.181-GET 2
171.10.92.181-POST 2
171.11.2.238-GET 2
171.11.2.238-POST 2
171.11.3.151-GET 2
171.11.3.151-POST 2
171.11.4.91-GET 2
172.16.30.1-GET 8
172.16.30.1-POST 8
182.118.20.156-GET 2
183.12.116.19-GET 2
183.12.116.19-POST 2
183.57.53.222-GET 2
192.168.240.224-GET 2
192.168.31.131-GET 3
192.168.31.193-GET 3
192.168.31.193-POST 3
192.168.32.100-GET 5
192.168.32.100-POST 2
192.168.32.108-GET 5
192.168.32.111-GET 2
192.168.32.111-POST 2
192.168.32.20-GET 4
192.168.32.20-POST 2
192.168.32.223-GET 2
192.168.32.37-GET 3
192.168.32.37-POST 2
192.168.32.41-GET 6
192.168.32.41-POST 5
192.168.32.63-GET 3
220.181.132.196-GET 2
222.66.141.10-GET 2
223.104.105.29-GET 2
223.104.105.29-POST 2
59.58.193.90-GET 2
59.58.193.90-POST 2
59.78.209.100-GET 2
61.151.217.45-GET 2
61.151.226.16-GET 2
61.151.226.191-GET 2
61.158.148.109-POST 2
61.158.148.116-GET 2
61.158.148.116-POST 2
61.158.148.43-GET 2
61.158.148.43-POST 2
61.158.148.48-GET 2
61.158.148.48-POST 2
61.158.148.51-POST 3
61.158.148.90-POST 2
61.158.149.129-POST 2
61.158.149.147-GET 2
61.158.149.147-POST 2
61.158.149.169-GET 2
61.158.149.169-POST 2
61.158.149.190-GET 2
61.158.149.190-POST 2
61.158.149.230-GET 2
61.158.149.230-POST 2
61.158.149.239-GET 2
61.158.149.239-POST 2
61.158.149.26-POST 2
61.158.149.29-POST 2
61.158.149.47-GET 2
61.158.149.47-POST 2
61.158.152.100-GET 2
61.158.152.100-POST 2
61.158.152.13-GET 2
61.158.152.13-POST 2
61.158.152.57-GET 2
61.158.152.57-POST 2
61.158.152.76-POST 2
61.178.77.18-GET 6
61.178.77.18-POST 4


8、如果想分析本地文件,那么更简单,设置 FileInputFormat 和FileOutFormat的时候,直接使用本地完整路径即可。

在本地运行的时候,hadoop只是作为一个java进程存在!


package com.fw.hadoop.example.log;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Main {
	public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
		Configuration conf = new Configuration();
	 	Job job = Job.getInstance(conf,"tomcat日志分析");
	    job.setJarByClass(Main.class);
	    job.setMapperClass(Mapper_.class);
	    job.setCombinerClass(Reducer_.class);
	    job.setReducerClass(Reducer_.class);
	    job.setOutputKeyClass(Text.class);
	    job.setOutputValueClass(IntWritable.class);
	   /* FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*"));
	    FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1"));*/
	    FileInputFormat.addInputPath(job, new Path("F:\\scrt_downLoad\\localhost*"));
	    FileOutputFormat.setOutputPath(job, new Path("F:\\10"));
	    System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

9、将这个job打包为jar,然后传到hadoop服务器,使用命令行执行

eclipse  -> export -jar   选择 这三个类 Main.java 、Reducer_.java、Mapper_.java.   -> 名字:tomcat-log.jar

hadoop 执行jar命令

hadoop jar tomcat-log.jar com.fw.hadoop.example.log.Main

10、如果遇到无法编译,则需要重写

org.apache.hadoop.io.nativeio.NativeIO类,去掉磁盘读写权限判定。



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值