日志分析demo

日志分析

在对日志进行分析的时候,恰巧写了两个不同的demo,记录一下:

代码块

1 初始版
@Component
public class LogMonitoring {

    @Autowired
    protected UnitSettingDao unitSettingDao;

    // 缓存unitSetting table中的 pid 和 cid;
    private static HashMap<String,String[]> unitPidCidMem = new HashMap<String, String[]>();

    public static void main(String[] args) {
        long startTime =System.currentTimeMillis();
        int code = 0;
        ClassPathXmlApplicationContext context = null;
        String inputPath = "/home/work/input.log";
        try {
            Log.infoLog(">>> start to prepare env.");
            context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml");
            Log.infoLog(">>> start to process log.");
            LogMonitoring logMonitoring = context.getBean(LogMonitoring.class);
            code = logMonitoring.unitStypeMonitor(inputPath);
        } catch (Exception e) {
            e.printStackTrace();
            code = 1;
        } finally {
            context.close();
        }
        double processTime = (System.currentTimeMillis()-startTime)/1000;
        Log.infoLog("Total process time is :" + processTime);
        System.exit(code);
    }

    /**
     * unitsetting监控
     */
    public int unitStypeMonitor(String inputFilePath) throws IOException{
        FileReader fr = null;
        BufferedReader br = null;
        int value = 0 ;
        String line = null;
        String unitId;
        String userId;
        String provid;
        String cityid;
        String pid = null;
        String cid = null;
        boolean flag;
        Set<Long> unitIds = new HashSet<Long>();
        try{
            fr = new FileReader(inputFilePath);
            br = new BufferedReader(fr);
            while ((line = br.readLine()) != null) {
                String[] tempStr;
                if (StringUtils.isEmpty(line) || (tempStr = line.split("\\s+")).length < 8) {
                    continue;
                }
                unitId = tempStr[8].split(":|,")[1];
                userId = tempStr[6].split(":|,")[1];
                provid = tempStr[tempStr.length-2].split(":|,")[1];
                cityid = tempStr[tempStr.length-1].split(":")[1];
                unitIds.add(Long.parseLong(unitId));
                if(unitPidCidMem.containsKey(unitId)){
                    pid = unitPidCidMem.get(unitId)[0];
                    cid = unitPidCidMem.get(unitId)[1];
                    flag = compareId(provid,cityid,pid,cid);
                }else {
                    Map<Long,UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(userId),unitIds);
                    if(TbFeedInterestPoMap.containsKey(Long.parseLong(unitId))) {
                        pid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getPid();
                        cid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getCid();
                    }else{
                       Log.infoLog("unitId id is not in databases!");
                    }
                    if((StringUtils.isEmpty(pid)) && (StringUtils.isEmpty(cid))){
                        continue;   // 不限地域
                    }
                    flag = compareId(provid,cityid,pid,cid);
                    String[] pidCid = new String[2];
                    pidCid[0] = pid;
                    pidCid[1] = cid;
                    unitPidCidMem.put(unitId,pidCid);   //将pid,Cid加入缓存;
                    try {
                        Thread.sleep(1);          //sleep 1 mills,缓解数据库压力
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                }
                if(!flag){
                    Log.infoLog(userId + " error!  " + "line : "+ line);  //误报
                    value = 1;
                }
           }
        } catch (Exception e) {
            throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e);
        } finally {
            try {
                if (br != null) {
                    br.close();
                }
                if (fr != null) {
                    fr.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return value;
    }
    /**
     * @param provid
     * @param cityid
     * @param pid:db中用','分割的多条pid;
     * @param cid:db中用','分割的多条cid;
     * @return
     */
    public boolean compareId(String provid,String cityid,String pid,String cid){
        if(StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)){
            return false;
        }
        if(pid.equals("0") ||(cid.equals("0"))){
            return true;
        }
        List<String> pidList = Arrays.asList(pid.split(","));
        List<String> cidList = Arrays.asList(cid.split(","));
        if(pidList.contains(provid) || cidList.contains(cityid)){
           return true;
        }
        if(pid.equals("0")&&cidList.contains(cityid)){
            return true;
        }
        return false;
    }
}
2 优化版
package com.baidu.fengchao.sirius.scripts.cases;
import com.baidu.fengchao.sirius.orm.dao.UnitSettingDao;
import com.baidu.fengchao.sirius.orm.po.UnitSetting;
import com.baidu.fengchao.sirius.scripts.Log;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import java.io.*;
import java.util.*;
/**
 * @author liuchaoqun01
 * @version 1.0
 */
@Component
public class LogMonitoring {

    @Autowired
    protected UnitSettingDao unitSettingDao;

    private static HashMap<Long, String[]> unitPidCidMem = new HashMap<Long, String[]>();  //缓存unitSetting table中的pid和cid;

    public static void main(String[] args) {
        long startTime = System.currentTimeMillis();
        int code;
        ClassPathXmlApplicationContext context = null;
        String inputPath = "/home/work/liuchaoqun01/feedclk.log";
        try {
            Log.infoLog(">>> start to prepare env.");
            context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml");
            Log.infoLog(">>> start to process log.");
            LogMonitoring logMonitoring = context.getBean(LogMonitoring.class);
            code = logMonitoring.unitStypeMonitor(inputPath);
        } catch (Exception e) {
            e.printStackTrace();
            code = 1;
        } finally {
            context.close();
        }
        double processTime = (System.currentTimeMillis() - startTime) / (1000 * 1.0);
        Log.infoLog("Total process time is :" + processTime);
        System.exit(code);
    }

    /**
     * unitsetting监控
     */
    public int unitStypeMonitor(String inputFilePath) throws IOException {
        FileReader fr = null;
        BufferedReader br = null;
        int value = 0;
        String line = null;
        String unitId;
        String userId;
        String provid;
        String cityid;
        int num = 0;
        int lineNumber = 0;
        String pid;
        String cid;
        boolean flag;
        HashMap<String, Set<Long>> unitIdsMap = new HashMap<String, Set<Long>>();        // 需要查库的unitIdsMap;
        HashMap<Integer, String> logData = new HashMap<Integer, String>();              // 保存log<num,line>
        HashMap<Integer, String[]> infoData = new HashMap<Integer, String[]>();         // 保存log<pid,cid>
        try {
            fr = new FileReader(inputFilePath);
            br = new BufferedReader(fr);
            while ((line = br.readLine()) != null) {
                ++lineNumber;
                String[] tempStr = line.split("\\s+");
                if (StringUtils.isEmpty(line) || (tempStr.length < 10) || (tempStr.length > 18)) {  //过滤无效log和打错的log
                    continue;
                }
                unitId = tempStr[8].split(":|,")[1];
                userId = tempStr[6].split(":|,")[1];
                provid = tempStr[tempStr.length - 2].split(":|,")[1];
                cityid = tempStr[tempStr.length - 1].split(":")[1];
                //如果在内存中,直接比较;
                if (unitPidCidMem.containsKey(unitId)) {
                    pid = unitPidCidMem.get(unitId)[0];
                    cid = unitPidCidMem.get(unitId)[1];
                    flag = compareId(provid, cityid, pid, cid);
                    if (!flag) {
                        Log.infoLog(userId + " error!  " + "line : " + line);  //误报
                        value = 1;
                    }
                } else { // 累计,当到达100条时进行读库并更新内存;
                    logData.put(lineNumber, line);  // 镜像将要处理的log数据;
                    String[] infoString = new String[3];
                    infoString[0] = provid;
                    infoString[1] = cityid;
                    infoString[2] = unitId;
                    infoData.put(lineNumber, infoString);
                    if (!unitIdsMap.containsKey(userId)) {
                        Set<Long> unitIdSet = new HashSet<Long>();
                        unitIdSet.add(Long.parseLong(unitId));
                        unitIdsMap.put(userId, unitIdSet);
                    } else {
                        Set<Long> unitIdSet = unitIdsMap.get(userId);
                        unitIdSet.add(Long.parseLong(unitId));
                        unitIdsMap.put(userId, unitIdSet);
                    }
                    ++num;
                    if (num == 100) {
                        // 查库&&加入内存,数据重新初始化;
                        if(!batchProcess(unitIdsMap,logData,infoData)) {
                            value = 1;
                        }
                        num = 0;
                        unitIdsMap = new HashMap<String, Set<Long>>();
                        logData = new HashMap<Integer, String>();
                        infoData = new HashMap<Integer, String[]>();
                    } else {
                        continue;
                    }
                }
            }
        } catch (Exception e) {
            throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e);
        } finally {
            try {
                if (br != null) {
                    br.close();
                }
                if (fr != null) {
                    fr.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return value;
    }

    /**
     * @param provid
     * @param cityid
     * @param pid:db中用','分割的多条pid;
     * @param cid:db中用','分割的多条cid;
     * @return
     */
    public boolean compareId(String provid, String cityid, String pid, String cid) {
        if(StringUtils.isEmpty(pid) && StringUtils.isEmpty(cid)){
            return true;
        }
        if (pid.equals("0") && (cid.equals("0"))) {
            return true;
        }
        if (StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)) {
            return false;
        }
        List<String> pidList = Arrays.asList(pid.split(","));
        List<String> cidList = Arrays.asList(cid.split(","));
        if (pidList.contains(provid) || cidList.contains(cityid)) {
            return true;
        }
        if (pid.equals("0") && cidList.contains(cityid)) {
            return true;
        }
        return false;
    }

    /**
     * 1 每100条进行读库操作 2 保证传过来的数据内存中是没有的,都是需要进行查库操作的;
     *
     * @param
     */
    public boolean batchProcess(HashMap<String, Set<Long>> unitIdsMap, HashMap<Integer, String> logData,
                                HashMap<Integer, String[]> infoData) {
        boolean flag = true;   //标记位;
        boolean code;
        Long unitId;
        for (String user : unitIdsMap.keySet()) {
            Set<Long> unitIds = unitIdsMap.get(user);
            Map<Long, UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(user), unitIds);
            try {          //sleep 1 mills,缓解数据库压力
                Thread.sleep(1);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            for (Long unit : TbFeedInterestPoMap.keySet()) {
                if (!unitPidCidMem.containsKey(unit)) {
                    String[] pidCid = new String[2];
                    pidCid[0] = TbFeedInterestPoMap.get(unit).getPid();
                    pidCid[1] = TbFeedInterestPoMap.get(unit).getCid();
                    unitPidCidMem.put(unit, pidCid);
                }
            }
        }
        for (Integer lineNumber : infoData.keySet()) {
            String[] infoStr = infoData.get(lineNumber);
            unitId = Long.parseLong(infoStr[2]);
            if(unitPidCidMem.containsKey(unitId)) {
                String[] pidCid = unitPidCidMem.get(unitId);
                code = compareId(infoStr[0], infoStr[1], pidCid[0], pidCid[1]);
                if (!code) {
                    Log.infoLog( "lineNumber:"+ lineNumber + "error!" +"     :" + logData.get(lineNumber));  //误报
                    flag = false;
                }
            }else {
                Log.infoLog("There is bug in code : com.baidu.fengchao.sirius.scripts.cases.LogMonitoring");
            }
        }
        return flag;
    }
}
3 小结:

数据:145M
方法1:运行时间1.4h
方法2:运行时间30min
共同点:两种方法在比较时都增加了缓存,但是方法2是依次读取,当累计缓存命中失败100次以后,进行统一查库,并且更新缓存;而方法1是直接每次都要访问数据库,还是有一定的局限性,会出现大量数据库访问的耗时。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值