将tomcat-access.log文件导入到HBase中
本次实验中,使用伪分布式模式,由于机器内存比较少,将文件一个个同步到HBase中
1、DRIVER
package com.sdnware.start04.hbase.log;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ParseTomcatAccessLogToHBaseDriver {
private static Logger LOG = LoggerFactory.getLogger(ParseTomcatAccessLogToHBaseDriver.class);
public static void main(String[] args){
try {
//System.setProperty("hadoop.home.dir", "E:/soft/hadoop-2.6.0");
Configuration conf = new Configuration();
conf.set(TableOutputFormat.OUTPUT_TABLE, "sdnware:tomcat_log");// 输出到hbase的表名
conf.set("hbase.zookeeper.quorum", "192.168.100.205");
conf.set("hbase.zookeeper.property.clientPort", "2181");//端口号
conf.set("hbase.master", "192.168.100.205:60000");
conf.set("hbase.rootdir","hdfs://192.168.100.221:9000/hbase");
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop.sdnware.com:9000"),conf);
FileStatus[] files = fileSystem.listStatus(new Path("/tomcat/access_log"));
for (FileStatus file : files) {
String jobName = "";
try {
Job job = getJob(conf, file);
jobName = job.getJobName();
LOG.info("JOB-NAME:"+jobName+".........START");
job.waitForCompletion(true);
LOG.info("JOB-NAME:"+jobName+".........END");
} catch (Exception e) {
LOG.error("JOB-NAME:"+jobName+".........END",e);
}
}
} catch (IllegalStateException | IOException | URISyntaxException e) {
LOG.error("失败",e);
}
}
public static Job getJob(Configuration conf, FileStatus file) throws IOException {
Path path = file.getPath();
Job job = Job.getInstance(conf, ParseTomcatAccessLogToHBaseDriver.class.getSimpleName()+path.getName());
// 当打成jar包时,必须有以下两行代码
TableMapReduceUtil.addDependencyJars(job);
job.setJarByClass(ParseTomcatAccessLogToHBaseDriver.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.setMapperClass(ParseTomcatAccessLogToHBase.ImportMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(LogWritable.class);
job.setReducerClass(ParseTomcatAccessLogToHBase.ImportReducer.class);
FileInputFormat.setInputPaths(job, path);
return job;
}
}
2、MR
package com.sdnware.start04.hbase.log;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
/**
*
* 将tomcat-access-log转存到HBase
* 基于Hadoop HDFS
* @author chenb.bob
* 2017年5月12日
*
*/
public class ParseTomcatAccessLogToHBase {
public static class ImportMapper extends Mapper<LongWritable, Text, LongWritable, LogWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LongWritable, LogWritable>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
AccessLog parseLog = ParseUtils.parseLog(line);
LogWritable logWritable = new LogWritable(parseLog);
context.write(key, logWritable);
}
}
public static class ImportReducer extends TableReducer<LongWritable, LogWritable, NullWritable>{
public static final String COLUMN_FAMILY = "access";
public static final String COLUMN_NAME_CLIENTIP = "clientIP";
public static final String COLUMN_NAME_DATETIME = "dateTime";
public static final String COLUMN_NAME_REQUEST_PATH = "url";
public static final String COLUMN_NAME_REQUEST_METHOD = "method";
public static final String COLUMN_NAME_REQUEST_PROTOCOL = "protocol";
public static final String COLUMN_NAME_RESPONSE_CODE = "status";
public static final String COLUMN_NAME_REQUEST_BYTES = "bytes";
@Override
protected void reduce(LongWritable key, Iterable<LogWritable> values,
Reducer<LongWritable, LogWritable, NullWritable, Mutation>.Context context)
throws IOException, InterruptedException {
Iterator<LogWritable> iterator = values.iterator();
while(iterator.hasNext()){
LogWritable next = iterator.next();
AccessLog accessLog = next.getAccessLog();
String dateTime = accessLog.getDateTime();
byte[] rowKey = ParseUtils.getRowKey(dateTime);
Put put = new Put(rowKey);
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_CLIENTIP), Bytes.toBytes(accessLog.getClientIP()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_DATETIME), Bytes.toBytes(accessLog.getDateTime()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_PATH), Bytes.toBytes(accessLog.getRequestPath()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_METHOD), Bytes.toBytes(accessLog.getRequestMethod()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_PROTOCOL), Bytes.toBytes(accessLog.getRequestProtocol()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_RESPONSE_CODE), Bytes.toBytes(accessLog.getHttpStatusCode()));
put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_BYTES), Bytes.toBytes(accessLog.getBytesSent()));
context.write(NullWritable.get(), put);
}
}
}
}
3、自定义Writable
package com.sdnware.start04.hbase.log;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.io.Writable;
public class LogWritable implements Writable {
private AccessLog accessLog;
public LogWritable(){}
public LogWritable(AccessLog accessLog){
this.accessLog = accessLog;
}
@Override
public void readFields(DataInput input) throws IOException {
byte[] bytes = new byte[input.readInt()];
input.readFully(bytes);
accessLog = (AccessLog)SerializationUtils.deserialize(bytes);
}
@Override
public void write(DataOutput output) throws IOException {
if (accessLog == null) {
throw new IOException("日志对象为NULL");
}
byte[] out = SerializationUtils.serialize(accessLog);
output.writeInt(out.length);
output.write(out);
}
public AccessLog getAccessLog() {
return accessLog;
}
public void setAccessLog(AccessLog accessLog) {
this.accessLog = accessLog;
}
}
4、日志类
package com.sdnware.start04.hbase.log;
import java.io.Serializable;
/**
* 日志信息
* @author chenb.bob
* 2017年5月12日
*
*/
public class AccessLog implements Serializable{
private static final long serialVersionUID = 1L;
private String clientIP;
private String dateTime;
private String requestMethod;
private String requestPath;
private String requestProtocol;
private String httpStatusCode;
private String bytesSent;
public AccessLog(){
super();
}
public String getClientIP() {
return clientIP;
}
public void setClientIP(String clientIP) {
this.clientIP = clientIP;
}
public String getDateTime() {
return dateTime;
}
public void setDateTime(String dateTime) {
this.dateTime = dateTime;
}
public String getRequestMethod() {
return requestMethod;
}
public void setRequestMethod(String requestMethod) {
this.requestMethod = requestMethod;
}
public String getRequestPath() {
return requestPath;
}
public void setRequestPath(String requestPath) {
this.requestPath = requestPath;
}
public String getRequestProtocol() {
return requestProtocol;
}
public void setRequestProtocol(String requestProtocol) {
this.requestProtocol = requestProtocol;
}
public String getHttpStatusCode() {
return httpStatusCode;
}
public void setHttpStatusCode(String httpStatusCode) {
this.httpStatusCode = httpStatusCode;
}
public String getBytesSent() {
return bytesSent;
}
public void setBytesSent(String bytesSent) {
this.bytesSent = bytesSent;
}
@Override
public String toString() {
return "AccessLog [clientIP=" + clientIP + ", dateTime=" + dateTime + ", requestMethod=" + requestMethod
+ ", requestPath=" + requestPath + ", requestProtocol=" + requestProtocol + ", httpStatusCode="
+ httpStatusCode + ", bytesSent=" + bytesSent + "]";
}
}
5、工具类
package com.sdnware.start04.hbase.log;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 类
* @author chenb.bob
* 2017年5月12日
*
*/
public class ParseUtils {
private static Logger LOG = LoggerFactory.getLogger(ParseUtils.class);
public static final String LOGENTRYPATTERN = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\S+)?";
public static final SimpleDateFormat FORMATTER = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.US);
public static final SimpleDateFormat FORMATTE2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
/**
*
* desc:
* author:chen.bob
* time:2017年5月12日 上午10:22:15
* @param timestamp
* @return
* @throws ParseException
*/
public static byte[] getRowKey(String timestamp){
long timeLong = 0L;
try {
timeLong = FORMATTER.parse(timestamp).getTime();
} catch (ParseException e) {
timeLong = System.currentTimeMillis();
}
String rowKeyStr = Long.toString(timeLong) + UUID.randomUUID().toString().replaceAll("-", "").toUpperCase();
byte[] rowKey = rowKeyStr.getBytes();
return rowKey;
}
/**
*
* desc:转换时间格式显示
* author:chen.bob
* time:2017年5月12日 上午11:08:58
* @param timestamp
* @return
* @throws ParseException
*/
public static String parseTime(String timestamp){
try {
return FORMATTE2.format(FORMATTER.parse(timestamp));
} catch (ParseException e) {
LOG.error("时间转换错误");
return timestamp;
}
}
/**
*
* desc:解析日志
* author:chen.bob
* time:2017年5月12日 上午11:13:10
* @param logEntryLine
* @return
*/
public static AccessLog parseLog(String logEntryLine) {
try {
Pattern p = Pattern.compile(LOGENTRYPATTERN);
Matcher matcher = p.matcher(logEntryLine);
if (!matcher.matches()) {
LOG.error("logEntryLine:" + logEntryLine);
return null;
}
String clientIP = matcher.group(1);
String dateTime = matcher.group(4);
String request = matcher.group(5);
String response = matcher.group(6);
String sendBytes = matcher.group(7);
AccessLog accessLog = new AccessLog();
accessLog.setClientIP(clientIP);
accessLog.setDateTime(parseTime(dateTime));
accessLog.setBytesSent(sendBytes);
accessLog.setHttpStatusCode(response);
String[] requestSplit = request.split("\\s+");
if (requestSplit.length == 2) {
String method = requestSplit[0];
String url = requestSplit[1];
accessLog.setRequestPath(url);
accessLog.setRequestMethod(method);
accessLog.setRequestProtocol("");
} else if (requestSplit.length == 3) {
String method = requestSplit[0];
String url = requestSplit[1];
String protocol = requestSplit[2];
accessLog.setRequestMethod(method);
accessLog.setRequestPath(url);
accessLog.setRequestProtocol(protocol);
}else{
accessLog.setRequestPath(request);
accessLog.setRequestMethod("");
accessLog.setRequestProtocol("");
}
return accessLog;
} catch (Exception e) {
LOG.error("logEntryLine:" + logEntryLine);
return null;
}
}
}