日志分析
在对日志进行分析的时候,恰巧写了两个不同的demo,记录一下:
代码块
1 初始版
@Component
public class LogMonitoring {
@Autowired
protected UnitSettingDao unitSettingDao;
// 缓存unitSetting table中的 pid 和 cid;
private static HashMap<String,String[]> unitPidCidMem = new HashMap<String, String[]>();
public static void main(String[] args) {
long startTime =System.currentTimeMillis();
int code = 0;
ClassPathXmlApplicationContext context = null;
String inputPath = "/home/work/input.log";
try {
Log.infoLog(">>> start to prepare env.");
context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml");
Log.infoLog(">>> start to process log.");
LogMonitoring logMonitoring = context.getBean(LogMonitoring.class);
code = logMonitoring.unitStypeMonitor(inputPath);
} catch (Exception e) {
e.printStackTrace();
code = 1;
} finally {
context.close();
}
double processTime = (System.currentTimeMillis()-startTime)/1000;
Log.infoLog("Total process time is :" + processTime);
System.exit(code);
}
/**
* unitsetting监控
*/
public int unitStypeMonitor(String inputFilePath) throws IOException{
FileReader fr = null;
BufferedReader br = null;
int value = 0 ;
String line = null;
String unitId;
String userId;
String provid;
String cityid;
String pid = null;
String cid = null;
boolean flag;
Set<Long> unitIds = new HashSet<Long>();
try{
fr = new FileReader(inputFilePath);
br = new BufferedReader(fr);
while ((line = br.readLine()) != null) {
String[] tempStr;
if (StringUtils.isEmpty(line) || (tempStr = line.split("\\s+")).length < 8) {
continue;
}
unitId = tempStr[8].split(":|,")[1];
userId = tempStr[6].split(":|,")[1];
provid = tempStr[tempStr.length-2].split(":|,")[1];
cityid = tempStr[tempStr.length-1].split(":")[1];
unitIds.add(Long.parseLong(unitId));
if(unitPidCidMem.containsKey(unitId)){
pid = unitPidCidMem.get(unitId)[0];
cid = unitPidCidMem.get(unitId)[1];
flag = compareId(provid,cityid,pid,cid);
}else {
Map<Long,UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(userId),unitIds);
if(TbFeedInterestPoMap.containsKey(Long.parseLong(unitId))) {
pid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getPid();
cid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getCid();
}else{
Log.infoLog("unitId id is not in databases!");
}
if((StringUtils.isEmpty(pid)) && (StringUtils.isEmpty(cid))){
continue; // 不限地域
}
flag = compareId(provid,cityid,pid,cid);
String[] pidCid = new String[2];
pidCid[0] = pid;
pidCid[1] = cid;
unitPidCidMem.put(unitId,pidCid); //将pid,Cid加入缓存;
try {
Thread.sleep(1); //sleep 1 mills,缓解数据库压力
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if(!flag){
Log.infoLog(userId + " error! " + "line : "+ line); //误报
value = 1;
}
}
} catch (Exception e) {
throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e);
} finally {
try {
if (br != null) {
br.close();
}
if (fr != null) {
fr.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return value;
}
/**
* @param provid
* @param cityid
* @param pid:db中用','分割的多条pid;
* @param cid:db中用','分割的多条cid;
* @return
*/
public boolean compareId(String provid,String cityid,String pid,String cid){
if(StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)){
return false;
}
if(pid.equals("0") ||(cid.equals("0"))){
return true;
}
List<String> pidList = Arrays.asList(pid.split(","));
List<String> cidList = Arrays.asList(cid.split(","));
if(pidList.contains(provid) || cidList.contains(cityid)){
return true;
}
if(pid.equals("0")&&cidList.contains(cityid)){
return true;
}
return false;
}
}
2 优化版
package com.baidu.fengchao.sirius.scripts.cases;
import com.baidu.fengchao.sirius.orm.dao.UnitSettingDao;
import com.baidu.fengchao.sirius.orm.po.UnitSetting;
import com.baidu.fengchao.sirius.scripts.Log;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import java.io.*;
import java.util.*;
/**
* @author liuchaoqun01
* @version 1.0
*/
@Component
public class LogMonitoring {
@Autowired
protected UnitSettingDao unitSettingDao;
private static HashMap<Long, String[]> unitPidCidMem = new HashMap<Long, String[]>(); //缓存unitSetting table中的pid和cid;
public static void main(String[] args) {
long startTime = System.currentTimeMillis();
int code;
ClassPathXmlApplicationContext context = null;
String inputPath = "/home/work/liuchaoqun01/feedclk.log";
try {
Log.infoLog(">>> start to prepare env.");
context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml");
Log.infoLog(">>> start to process log.");
LogMonitoring logMonitoring = context.getBean(LogMonitoring.class);
code = logMonitoring.unitStypeMonitor(inputPath);
} catch (Exception e) {
e.printStackTrace();
code = 1;
} finally {
context.close();
}
double processTime = (System.currentTimeMillis() - startTime) / (1000 * 1.0);
Log.infoLog("Total process time is :" + processTime);
System.exit(code);
}
/**
* unitsetting监控
*/
public int unitStypeMonitor(String inputFilePath) throws IOException {
FileReader fr = null;
BufferedReader br = null;
int value = 0;
String line = null;
String unitId;
String userId;
String provid;
String cityid;
int num = 0;
int lineNumber = 0;
String pid;
String cid;
boolean flag;
HashMap<String, Set<Long>> unitIdsMap = new HashMap<String, Set<Long>>(); // 需要查库的unitIdsMap;
HashMap<Integer, String> logData = new HashMap<Integer, String>(); // 保存log<num,line>
HashMap<Integer, String[]> infoData = new HashMap<Integer, String[]>(); // 保存log<pid,cid>
try {
fr = new FileReader(inputFilePath);
br = new BufferedReader(fr);
while ((line = br.readLine()) != null) {
++lineNumber;
String[] tempStr = line.split("\\s+");
if (StringUtils.isEmpty(line) || (tempStr.length < 10) || (tempStr.length > 18)) { //过滤无效log和打错的log
continue;
}
unitId = tempStr[8].split(":|,")[1];
userId = tempStr[6].split(":|,")[1];
provid = tempStr[tempStr.length - 2].split(":|,")[1];
cityid = tempStr[tempStr.length - 1].split(":")[1];
//如果在内存中,直接比较;
if (unitPidCidMem.containsKey(unitId)) {
pid = unitPidCidMem.get(unitId)[0];
cid = unitPidCidMem.get(unitId)[1];
flag = compareId(provid, cityid, pid, cid);
if (!flag) {
Log.infoLog(userId + " error! " + "line : " + line); //误报
value = 1;
}
} else { // 累计,当到达100条时进行读库并更新内存;
logData.put(lineNumber, line); // 镜像将要处理的log数据;
String[] infoString = new String[3];
infoString[0] = provid;
infoString[1] = cityid;
infoString[2] = unitId;
infoData.put(lineNumber, infoString);
if (!unitIdsMap.containsKey(userId)) {
Set<Long> unitIdSet = new HashSet<Long>();
unitIdSet.add(Long.parseLong(unitId));
unitIdsMap.put(userId, unitIdSet);
} else {
Set<Long> unitIdSet = unitIdsMap.get(userId);
unitIdSet.add(Long.parseLong(unitId));
unitIdsMap.put(userId, unitIdSet);
}
++num;
if (num == 100) {
// 查库&&加入内存,数据重新初始化;
if(!batchProcess(unitIdsMap,logData,infoData)) {
value = 1;
}
num = 0;
unitIdsMap = new HashMap<String, Set<Long>>();
logData = new HashMap<Integer, String>();
infoData = new HashMap<Integer, String[]>();
} else {
continue;
}
}
}
} catch (Exception e) {
throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e);
} finally {
try {
if (br != null) {
br.close();
}
if (fr != null) {
fr.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return value;
}
/**
* @param provid
* @param cityid
* @param pid:db中用','分割的多条pid;
* @param cid:db中用','分割的多条cid;
* @return
*/
public boolean compareId(String provid, String cityid, String pid, String cid) {
if(StringUtils.isEmpty(pid) && StringUtils.isEmpty(cid)){
return true;
}
if (pid.equals("0") && (cid.equals("0"))) {
return true;
}
if (StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)) {
return false;
}
List<String> pidList = Arrays.asList(pid.split(","));
List<String> cidList = Arrays.asList(cid.split(","));
if (pidList.contains(provid) || cidList.contains(cityid)) {
return true;
}
if (pid.equals("0") && cidList.contains(cityid)) {
return true;
}
return false;
}
/**
* 1 每100条进行读库操作 2 保证传过来的数据内存中是没有的,都是需要进行查库操作的;
*
* @param
*/
public boolean batchProcess(HashMap<String, Set<Long>> unitIdsMap, HashMap<Integer, String> logData,
HashMap<Integer, String[]> infoData) {
boolean flag = true; //标记位;
boolean code;
Long unitId;
for (String user : unitIdsMap.keySet()) {
Set<Long> unitIds = unitIdsMap.get(user);
Map<Long, UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(user), unitIds);
try { //sleep 1 mills,缓解数据库压力
Thread.sleep(1);
} catch (InterruptedException e) {
e.printStackTrace();
}
for (Long unit : TbFeedInterestPoMap.keySet()) {
if (!unitPidCidMem.containsKey(unit)) {
String[] pidCid = new String[2];
pidCid[0] = TbFeedInterestPoMap.get(unit).getPid();
pidCid[1] = TbFeedInterestPoMap.get(unit).getCid();
unitPidCidMem.put(unit, pidCid);
}
}
}
for (Integer lineNumber : infoData.keySet()) {
String[] infoStr = infoData.get(lineNumber);
unitId = Long.parseLong(infoStr[2]);
if(unitPidCidMem.containsKey(unitId)) {
String[] pidCid = unitPidCidMem.get(unitId);
code = compareId(infoStr[0], infoStr[1], pidCid[0], pidCid[1]);
if (!code) {
Log.infoLog( "lineNumber:"+ lineNumber + "error!" +" :" + logData.get(lineNumber)); //误报
flag = false;
}
}else {
Log.infoLog("There is bug in code : com.baidu.fengchao.sirius.scripts.cases.LogMonitoring");
}
}
return flag;
}
}
3 小结:
数据:145M
方法1:运行时间1.4h
方法2:运行时间30min
共同点:两种方法在比较时都增加了缓存,但是方法2是依次读取,当累计缓存命中失败100次以后,进行统一查库,并且更新缓存;而方法1是直接每次都要访问数据库,还是有一定的局限性,会出现大量数据库访问的耗时。