做日志文件入库的时候,有时会遇到比较大的文件,甚至会超过1G文件入库,这时用传统的IO已经无法解决,这时会用NIO 或者是网上一些开源的框架。在日志入库时,几百万条记录入库时,就需要批量插入。
1.ResvolingLogFile.java
public boolean moreLogFile(String fileAllPath,String logFormat,
String key,String logType) throws Exception {
int bufSize = 100,num =0;;
int count=0;
boolean bo =false;
File file = new File(fileAllPath);
FileChannel fcl = new RandomAccessFile(file, "r").getChannel();
ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);
DBConnectionPool dbpool = new DBConnectionPool(); //创建连接池
dbpool.createPool(); //清空连接池
Connection conn = dbpool.getConnection(); //根据连接池的初始连接量创建连接
conn.setAutoCommit(false);
PreparedStatement ppsm = null;
ResvolingLogFile rf = new ResvolingLogFile();
Map map = rf.pinjieSql(logFormat);
String sqlkey = "",sqlvalue="";
Iterator it = map.entrySet().iterator();
while (it.hasNext()){
Map.Entry entry = (Map.Entry)it.next();
sqlkey = (String) entry.getKey();
sqlvalue = (String) entry.getValue();
}
String zhengzeAll = ""; //从配置文件中取出log文件验证的正则表达式
String [] logkey = sqlkey.split(",");
for(int s=0;s<logkey.length;s++){ //将正则表达式连接起来,并分组
zhengzeAll = zhengzeAll + "(" + rb.getString(logkey[s]) +")";
}
ppsm = conn.prepareStatement(sqlvalue);
String enterStr ="\n";
String line="",impline="";
int n=0;
byte [] bt = new byte [bufSize];
StringBuffer strBuf = new StringBuffer("");
try{
//循环取出文件的每一行的内容
while(fcl.read(rBuffer)!=-1 ){
int rSize = rBuffer.position();
rBuffer.rewind();
rBuffer.get(bt);
rBuffer.clear();
String tempString = new String(bt,0,rSize);
int fromIndex=0;
int endIndex=0;
while((endIndex=tempString.indexOf(enterStr,fromIndex))!=-1){
line = tempString.substring(fromIndex, endIndex);
line = new String(strBuf.toString()+line);
strBuf.delete(0,strBuf.length());
fromIndex = endIndex + 1;
System.out.println("===line==="+line);
Pattern pat = Pattern.compile(zhengzeAll.trim());
Matcher mat = pat.matcher(line.trim()+" ");
count++;
//取出每一行的内容与正则表达式进行匹配
if(!mat.matches()){
continue; //如果不匹配,则跳出这次循环,继续下一次循环
}
//如果循环匹配,则根据分组将根据正则表达式截取出相应的字符串
for(int f=0;f<mat.groupCount();f++){
if(mat.group(f+1).indexOf("\"")>-1)
{
ppsm.setString(f+1, mat.group(f+1).replaceAll("\"", ""));
}else{
ppsm.setString(f+1, mat.group(f+1));
}
}
ppsm.setString(logkey.length+1, key);
ppsm.setString(logkey.length+2, logType);
ppsm.addBatch();
num++;
n++;
//批量插入,每次插入2000行
if(num==2000){
ppsm.executeBatch(); //批量将2000条记录插入数据库
conn.commit(); //插入后commit
dbpool.returnConnection(conn); //插入完成后释放连接
ppsm.clearBatch(); //插入完成后情况Batch
num = 0;
}
}if(rSize>tempString.length()){
strBuf.append(tempString.subSequence(fromIndex, tempString.length()));
}else{
strBuf.append(tempString.subSequence(fromIndex, rSize));
}
}
if(n%2000!=0){
ppsm.executeBatch(); //最后的记录不足2000 行时,提交插入
conn.commit(); //插入后commit
dbpool.returnConnection(conn); //插入完成后释放连接
ppsm.clearBatch(); //插入完成后情况Batch
bo = true;
}
}catch(Exception ex){
ex.printStackTrace();
}
dbpool.closeConnectionPool();
System.out.println("==========n======="+n);
System.out.println("==========count==="+count);
return bo;
}