数据迁移

将tomcat-access.log文件导入到HBase中
本次实验中,使用伪分布式模式,由于机器内存比较少,将文件一个个同步到HBase中

1、DRIVER

package com.sdnware.start04.hbase.log;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.LinkedList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParseTomcatAccessLogToHBaseDriver {

    private static Logger LOG = LoggerFactory.getLogger(ParseTomcatAccessLogToHBaseDriver.class);

    public static void main(String[] args){

        try {
            //System.setProperty("hadoop.home.dir", "E:/soft/hadoop-2.6.0");

            Configuration conf = new Configuration();
            conf.set(TableOutputFormat.OUTPUT_TABLE, "sdnware:tomcat_log");// 输出到hbase的表名

            conf.set("hbase.zookeeper.quorum", "192.168.100.205"); 
            conf.set("hbase.zookeeper.property.clientPort", "2181");//端口号
            conf.set("hbase.master", "192.168.100.205:60000");
            conf.set("hbase.rootdir","hdfs://192.168.100.221:9000/hbase");

            FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop.sdnware.com:9000"),conf);

            FileStatus[] files = fileSystem.listStatus(new Path("/tomcat/access_log"));

            for (FileStatus file : files) {
                String jobName = "";
                try {
                    Job job = getJob(conf, file);
                    jobName = job.getJobName();
                    LOG.info("JOB-NAME:"+jobName+".........START");
                    job.waitForCompletion(true);
                    LOG.info("JOB-NAME:"+jobName+".........END");
                } catch (Exception e) {
                    LOG.error("JOB-NAME:"+jobName+".........END",e);
                }
            }

        } catch (IllegalStateException | IOException | URISyntaxException e) {
            LOG.error("失败",e);
        }
    }

    public static Job getJob(Configuration conf, FileStatus file) throws IOException {
        Path path = file.getPath();

        Job job = Job.getInstance(conf, ParseTomcatAccessLogToHBaseDriver.class.getSimpleName()+path.getName());
        // 当打成jar包时,必须有以下两行代码
        TableMapReduceUtil.addDependencyJars(job);
        job.setJarByClass(ParseTomcatAccessLogToHBaseDriver.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TableOutputFormat.class);

        job.setMapperClass(ParseTomcatAccessLogToHBase.ImportMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LogWritable.class);

        job.setReducerClass(ParseTomcatAccessLogToHBase.ImportReducer.class);

        FileInputFormat.setInputPaths(job, path);

        return job;
    }
}

2、MR

package com.sdnware.start04.hbase.log;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * 
 * 将tomcat-access-log转存到HBase
 * 基于Hadoop HDFS
 * @author chenb.bob
 * 2017年5月12日
 *
 */
public class ParseTomcatAccessLogToHBase {

    public static class ImportMapper extends Mapper<LongWritable, Text, LongWritable, LogWritable>{

        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LongWritable, LogWritable>.Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            AccessLog parseLog = ParseUtils.parseLog(line);
            LogWritable logWritable = new LogWritable(parseLog);
            context.write(key, logWritable);
        }

    }

    public static class ImportReducer extends TableReducer<LongWritable, LogWritable, NullWritable>{

        public static final String COLUMN_FAMILY = "access";  
        public static final String COLUMN_NAME_CLIENTIP = "clientIP";  
        public static final String COLUMN_NAME_DATETIME = "dateTime";  
        public static final String COLUMN_NAME_REQUEST_PATH = "url"; 
        public static final String COLUMN_NAME_REQUEST_METHOD = "method";  
        public static final String COLUMN_NAME_REQUEST_PROTOCOL = "protocol";  
        public static final String COLUMN_NAME_RESPONSE_CODE = "status";  
        public static final String COLUMN_NAME_REQUEST_BYTES = "bytes";  

        @Override
        protected void reduce(LongWritable key, Iterable<LogWritable> values,
                Reducer<LongWritable, LogWritable, NullWritable, Mutation>.Context context)
                throws IOException, InterruptedException {
            Iterator<LogWritable> iterator = values.iterator();
            while(iterator.hasNext()){
                LogWritable next = iterator.next();
                AccessLog accessLog = next.getAccessLog();
                String dateTime = accessLog.getDateTime();
                byte[] rowKey = ParseUtils.getRowKey(dateTime);
                Put put = new Put(rowKey);
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_CLIENTIP), Bytes.toBytes(accessLog.getClientIP()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_DATETIME), Bytes.toBytes(accessLog.getDateTime()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_PATH), Bytes.toBytes(accessLog.getRequestPath()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_METHOD), Bytes.toBytes(accessLog.getRequestMethod()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_PROTOCOL), Bytes.toBytes(accessLog.getRequestProtocol()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_RESPONSE_CODE), Bytes.toBytes(accessLog.getHttpStatusCode()));
                put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_NAME_REQUEST_BYTES), Bytes.toBytes(accessLog.getBytesSent()));

                context.write(NullWritable.get(), put);
            }
        }
    }
}

3、自定义Writable

package com.sdnware.start04.hbase.log;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.io.Writable;

public class LogWritable implements Writable {

    private AccessLog accessLog;

    public LogWritable(){}

    public LogWritable(AccessLog accessLog){
        this.accessLog = accessLog;
    }

    @Override
    public void readFields(DataInput input) throws IOException {
        byte[] bytes = new byte[input.readInt()];  
        input.readFully(bytes);
        accessLog = (AccessLog)SerializationUtils.deserialize(bytes);
    }

    @Override
    public void write(DataOutput output) throws IOException {
        if (accessLog == null) {  
            throw new IOException("日志对象为NULL");  
        }  
        byte[] out = SerializationUtils.serialize(accessLog); 
        output.writeInt(out.length);
        output.write(out);
    }

    public AccessLog getAccessLog() {
        return accessLog;
    }

    public void setAccessLog(AccessLog accessLog) {
        this.accessLog = accessLog;
    }

}

4、日志类

package com.sdnware.start04.hbase.log;

import java.io.Serializable;

/**
 * 日志信息
 * @author chenb.bob
 * 2017年5月12日
 *
 */
public class AccessLog implements Serializable{

    private static final long serialVersionUID = 1L;

        private String clientIP;

        private String dateTime;

        private String requestMethod;

        private String requestPath;

        private String requestProtocol;

        private String httpStatusCode;

        private String bytesSent;

        public AccessLog(){
            super();
        }

        public String getClientIP() {
            return clientIP;
        }

        public void setClientIP(String clientIP) {
            this.clientIP = clientIP;
        }

        public String getDateTime() {
            return dateTime;
        }

        public void setDateTime(String dateTime) {
            this.dateTime = dateTime;
        }

        public String getRequestMethod() {
            return requestMethod;
        }

        public void setRequestMethod(String requestMethod) {
            this.requestMethod = requestMethod;
        }

        public String getRequestPath() {
            return requestPath;
        }

        public void setRequestPath(String requestPath) {
            this.requestPath = requestPath;
        }

        public String getRequestProtocol() {
            return requestProtocol;
        }

        public void setRequestProtocol(String requestProtocol) {
            this.requestProtocol = requestProtocol;
        }

        public String getHttpStatusCode() {
            return httpStatusCode;
        }

        public void setHttpStatusCode(String httpStatusCode) {
            this.httpStatusCode = httpStatusCode;
        }

        public String getBytesSent() {
            return bytesSent;
        }

        public void setBytesSent(String bytesSent) {
            this.bytesSent = bytesSent;
        }

        @Override
        public String toString() {
            return "AccessLog [clientIP=" + clientIP + ", dateTime=" + dateTime + ", requestMethod=" + requestMethod
                    + ", requestPath=" + requestPath + ", requestProtocol=" + requestProtocol + ", httpStatusCode="
                    + httpStatusCode + ", bytesSent=" + bytesSent + "]";
        }
}

5、工具类

package com.sdnware.start04.hbase.log;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 类
 * @author chenb.bob
 * 2017年5月12日
 *
 */
public class ParseUtils {

    private static Logger LOG = LoggerFactory.getLogger(ParseUtils.class);

    public static final String LOGENTRYPATTERN = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\S+)?";

    public static final SimpleDateFormat FORMATTER = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.US);

    public static final SimpleDateFormat FORMATTE2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");


    /**
     * 
     * desc:
     * author:chen.bob
     * time:2017年5月12日 上午10:22:15
     * @param timestamp
     * @return
     * @throws ParseException
     */
    public static byte[] getRowKey(String timestamp){
        long timeLong = 0L;
        try {
            timeLong = FORMATTER.parse(timestamp).getTime();
        } catch (ParseException e) {
            timeLong = System.currentTimeMillis();
        }

        String rowKeyStr = Long.toString(timeLong) + UUID.randomUUID().toString().replaceAll("-", "").toUpperCase();
                byte[] rowKey = rowKeyStr.getBytes();
        return rowKey;
    }

    /**
     * 
     * desc:转换时间格式显示
     * author:chen.bob
     * time:2017年5月12日 上午11:08:58
     * @param timestamp
     * @return
     * @throws ParseException
     */
    public static String parseTime(String timestamp){

        try {
            return FORMATTE2.format(FORMATTER.parse(timestamp));
        } catch (ParseException e) {
            LOG.error("时间转换错误");
            return timestamp;
        }
    }

    /**
     * 
     * desc:解析日志
     * author:chen.bob
     * time:2017年5月12日 上午11:13:10
     * @param logEntryLine
     * @return
     */
    public static AccessLog parseLog(String logEntryLine) {
        try {
            Pattern p = Pattern.compile(LOGENTRYPATTERN);
            Matcher matcher = p.matcher(logEntryLine);
            if (!matcher.matches()) {
                LOG.error("logEntryLine:" + logEntryLine);
                return null;
            }

            String clientIP = matcher.group(1);
            String dateTime = matcher.group(4);
            String request = matcher.group(5);
            String response = matcher.group(6);
            String sendBytes = matcher.group(7);

            AccessLog accessLog = new AccessLog();
            accessLog.setClientIP(clientIP);
            accessLog.setDateTime(parseTime(dateTime));
            accessLog.setBytesSent(sendBytes);
            accessLog.setHttpStatusCode(response);

            String[] requestSplit = request.split("\\s+");

            if (requestSplit.length == 2) {
                String method = requestSplit[0];
                String url = requestSplit[1];
                accessLog.setRequestPath(url);
                accessLog.setRequestMethod(method);
                accessLog.setRequestProtocol("");
            } else if (requestSplit.length == 3) {
                String method = requestSplit[0];
                String url = requestSplit[1];
                String protocol = requestSplit[2];
                accessLog.setRequestMethod(method);
                accessLog.setRequestPath(url);
                accessLog.setRequestProtocol(protocol);
            }else{
                accessLog.setRequestPath(request);
                accessLog.setRequestMethod("");
                accessLog.setRequestProtocol("");
            }
            return accessLog;
        } catch (Exception e) {
            LOG.error("logEntryLine:" + logEntryLine);
            return null;
        }
    }
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值