1.flume的sink为aysnhbase的配置
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#定义代理名称
monster.sources= AvroIn
monster.sinks = HbaseOut
monster.channels = monsterchannel
#具体定义source
monster.sources.AvroIn.type = avro
monster.sources.AvroIn.bind = 172.16.37.107
monster.sources.AvroIn.port = 42400
#具体定义sink
monster.sinks.HbaseOut.type = asynchbase
monster.sinks.HbaseOut.table = monstor_mm7mt
monster.sinks.HbaseOut.columnFamily = cf1
monster.sinks.HbaseOut.batchSize = 10
monster.sinks.HbaseOut.serializer = com.caissa.chador_flume.AsyncHbaseAllLogEventSerializer
monster.sinks.HbaseOut.serializer.columns = xunqi_number,protocol_type,message_type,submit_number,smsreq_rid,message_number,company_code,user_name,channel_value,billingusers_number,billing_type,aimphone_number,phone_number,aim_phone,appcode,is_status,messagevalid_time,message_sendtime,mobilevalide_number,valid_type,expenses,link_id,tp_pid,tp_udhi,message_format,message_code,mobiledeal_number,moblie_result,titile_length,mmcresouce_id,mmc_titile
#具体定义channel---filechannel
#monster.channels.monsterchannel.type = file
#monster.channels.monsterchannel.checkpointDir = /data/dataeckPoint/monster
#monster.channels.monsterchannel.backupCheckpointDir = /data/dataeckPoint/monster
#monster.channels.monsterchannel.keep-alive=10
#==========memorychannel================
monster.channels.monsterchannel.type = memory
monster.channels.monsterchannel.capacity=1000000
monster.channels.monsterchannel.keep-alive=10
monster.channels.monsterchannel.transactioncapacity=1000
#==========kafkachannel=================
#monster.channels.monsterchannel.type = org.apache.flume.channel.kafka.KafkaChannel
#monster.channels.monsterchannel.brokerList = 192.100.4.3:9092,192.100.4.13:9092,192.100.4.15:9092
#monster.channels.monsterchannel.zookeeperConnect = 192.100.4.3:2181,192.100.4.13:2181,192.100.4.15:2181
#monster.channels.monsterchannel.topic = FLUME_TEST_TOPIC
#monster.channels.monsterchannel.kafka.consumer.timeout.ms = 100
#monster.channels.monsterchannel.parseAsFlumeEvent = false
#组装source channel sink
#monster.sources.oedipus_info.channels= mc1
monster.sources.AvroIn.channels= monsterchannel
monster.sinks.HbaseOut.channel = monsterchannel
2.com.caissa.chador_flume.AsyncHbaseAllLogEventSerializer的来源。
将项目打包成jar包放到flume的lib目录下。就可以调用到
项目的结构为:
AsyncHbaseAllLogEventSerializer类的内容为:
package com.caissa.chador_flume;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.sink.hbase.AsyncHbaseEventSerializer;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.PutRequest;
import java.net.InetAddress;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by liuzhiling on 2018/7/26.
*/
public class AsyncHbaseAllLogEventSerializer implements AsyncHbaseEventSerializer {
private byte[] table;
private byte[] colFam;
private Event currentEvent;
private byte[][] columnNames;
private final List<PutRequest> puts = new ArrayList<PutRequest>();
private final List<AtomicIncrementRequest> incs = new ArrayList<AtomicIncrementRequest>();
private final List<String> list = new ArrayList<String>();
private byte[] currentRowKey;
private final byte[] eventCountCol = "eventCount".getBytes();
private String hostIP = "";
private long consumerCount = 0;
private int countSTC = 0;
// private int colLength = 0; //记录记录列的长度
protected static final Log logger = LogFactory.getLog(AsyncHbaseAllLogEventSerializer.class);
/**
* 启动初始化的方法。
* @param table
* @param cf
*/
public void initialize(byte[] table, byte[] cf) {
InetAddress ia=null;
try {
ia = ia.getLocalHost();
this.hostIP = ia.getHostAddress();
System.out.println("本机的ip是 :"+this.hostIP);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 异步处理数据的方法
* @return
*/
public List<PutRequest> getActions() {
System.out.println("开始处理时间"+System.currentTimeMillis());
String eventStr = new String(currentEvent.getBody());
try {
String[] cols = logTokenize(eventStr);
puts.clear();
//1.判断是否为我们要入库的日志类型
String logtype=PropertyUtil.getProperty("logType");
ArrayList<String> logtypeList=StringToList(logtype);
String mylogtype="";
for(int logtypeNumer=0;logtypeNumer<logtypeList.size();logtypeNumer++){
for (int colNumber=0;colNumber<cols.length;colNumber++){
if(logtypeList.get(logtypeNumer).equals(cols[colNumber])){
mylogtype=logtypeList.get(logtypeNumer);
break;
}
}
if(!"".equals(mylogtype)){
break;
}
}
int logtypeNumber=new Integer(PropertyUtil.getProperty(mylogtype+"Number"));
if (!mylogtype.equals(cols[logtypeNumber])){
return puts;
}
//2.配置key
String req = cols[6];
this.table = PropertyUtil.getProperty(mylogtype+"Table").getBytes();
this.colFam = PropertyUtil.getProperty(mylogtype+"cf").getBytes();
String splitStr=PropertyUtil.getProperty(mylogtype+"Split");
String[] regArray = req.split(splitStr, -1);
String propertyKey=PropertyUtil.getProperty(mylogtype+"Key");
ArrayList<String> result=getTime(cols);//得到时间
if(!"".equals("propertyKey") && propertyKey!=null){
ArrayList<String> modelKey=StringToList(propertyKey);
/**
* 判断key的模板类型
* 1.为字段_时间_随机数
* 2.为字段的组合
*/
if(modelKey.get(0).equals("1")){
this.countSTC += 1;
this.countSTC = this.countSTC % 10000;
String RowKeyStr=groupValue(modelKey,regArray,cols[logtypeNumber]);
currentRowKey = (RowKeyStr+result.get(0)+"_"+Long.toString(this.countSTC)).getBytes();
}else if(modelKey.get(0).equals("2")){
String RowKeyStr=groupValue(modelKey,regArray,cols[logtypeNumber]);
RowKeyStr=RowKeyStr.substring(0,RowKeyStr.length());
currentRowKey=RowKeyStr.getBytes();
}
}else{
throw new Exception("未配置key的模板以及参数");
}
/**
* 3.组合入库字段
* 入库规则判断字段putValue。
* putValue空值代表没有特殊处理。就是配置文件一对一入值
* putValue为1代表字段入值需要特殊处理。需要过滤字段。需要判断columnNames需要的相应的值。
* 注意:此处需要配置{logtype}Property和{logtype}PropertyValue值。需要一一对应
* putValue为2代表为该类型的日志分小类型。字段为这几个类型的交集。需要各个类型特殊处理
* 注意:此处需要配置{sencondlogtype}Property和{sncondlogtype}PropertyValue主要一一对应
*/
String putValue=PropertyUtil.getProperty(mylogtype+"PutValue");
logger.info("eventStr: =========" + eventStr+"table:====== " + new String(table)+"======colFam======"+new String(colFam)+"=========" +
"putValue===="+putValue+" =======mylogtype===="+mylogtype+"====__currentRowKey: " + new String(currentRowKey) + "========= __consumerCount:"+ this.consumerCount);
if(putValue!=null && !"".equals(putValue)){
if(putValue.equals("1")){
String propertyStr=PropertyUtil.getProperty(mylogtype+"Property");
ArrayList<String> property=StringToList(propertyStr);
String propertyValueStr=PropertyUtil.getProperty(mylogtype+"PropertyValue");
if(propertyValueStr!=null && !"".equals(propertyValueStr)){
ArrayList<String> propertyValue=StringToList(propertyValueStr);
//for (int i = 0; i < this.colLength; i++){
for(int p=0;p<property.size();p++){
int propertyNumber=Integer.parseInt(propertyValue.get(p));
String rowValue=regArray[propertyNumber];
//给rowvalue去空格
if(rowValue!=null){
rowValue=rowValue.trim();
}
//if(new String(columnNames[i]).equals(property.get(propertyNumber))){
String SpecialStr=PropertyUtil.getProperty(mylogtype+"Special");
if(SpecialStr!=null){
String [] SpecialPropertyStrs=SpecialStr.split("#");
for(int s=0;s<SpecialPropertyStrs.length;s++){
String SpecialPropertyStr=SpecialPropertyStrs[s];
ArrayList<String> SpecialPropertyList=StringToList(SpecialPropertyStr);
if(SpecialPropertyStr!=null && !"".equals(SpecialPropertyStr)){
String secondValue=specialStr(SpecialPropertyList,rowValue,property.get(propertyNumber));
if(SpecialPropertyList.get(1).equals("substringtoOtherValue")){
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, SpecialPropertyList.get(4).getBytes(), secondValue.getBytes());
puts.add(putReq);
putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
puts.add(putReq);
}else{
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), secondValue.getBytes());
puts.add(putReq);
}
}else{
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
puts.add(putReq);
}
}
}else{
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
puts.add(putReq);
}
// }
}
// }
}else{
throw new Exception("未配置property和propertyValue的值");
}
}else if(putValue.equals("2")){
String secondOprate=PropertyUtil.getProperty(mylogtype+"SecondOprate");
if(secondOprate!=null && !"".equals(secondOprate)){
ArrayList<String> sencondOprateList=StringToList(secondOprate);
String secondOprateType= new String(regArray[new Integer(sencondOprateList.get(0))]);
String secondPropertyStr=null;
String secondPropertyValueStr=null;
if(mylogtype.equals("appproxy") || mylogtype.equals("bossproxy")){
secondPropertyStr=PropertyUtil.getProperty(mylogtype+secondOprateType+"Property");
secondPropertyValueStr=PropertyUtil.getProperty(mylogtype+secondOprateType+"PropertyValue");
}else{
secondPropertyStr=PropertyUtil.getProperty(secondOprateType+"Property");
secondPropertyValueStr=PropertyUtil.getProperty(secondOprateType+"PropertyValue");
}
if(secondPropertyStr!=null && !"".equals(secondPropertyStr) && secondPropertyValueStr!=null && !"".equals(secondPropertyValueStr)){
ArrayList<String> secondeProperty=StringToList(secondPropertyStr);
ArrayList<String> secondePropertyValue=StringToList(secondPropertyValueStr);
// for (int i = 0; i < this.colLength; i++){
for(int p=0;p<secondeProperty.size();p++){
int secondeNumber=Integer.parseInt(secondePropertyValue.get(p));
String sencondValue=regArray[secondeNumber];
//给sencondValue去空格
if(sencondValue!=null){
sencondValue=sencondValue.trim();
}
//if(new String(columnNames[i]).equals(secondeProperty.get(secondeNumber))){
String secondSpecialStr=PropertyUtil.getProperty(secondOprateType+"Special");
// logger.info("====secondPropertyStr======"+secondPropertyStr+"=====secondSpecialStr===="+secondSpecialStr);
if(secondSpecialStr!=null ){
String[] secondSpecialPropertyStrs=secondSpecialStr.split("#");
for(int s=0;s<secondSpecialPropertyStrs.length;s++){
String secondSpecialPropertyStr=secondSpecialPropertyStrs[s];
ArrayList<String> secondSpecialPropertyList=StringToList(secondSpecialPropertyStr);
if(secondSpecialPropertyStr!=null && !"".equals(secondSpecialPropertyStr)){
String secondValue=specialStr(secondSpecialPropertyList,sencondValue,secondeProperty.get(secondeNumber));
if(secondSpecialPropertyList.get(1).equals("substringtoOtherValue")){
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, secondSpecialPropertyList.get(4).getBytes(), secondValue.getBytes());
puts.add(putReq);
// logger.info("1.====入库字段为======"+secondSpecialPropertyList.get(4)+"======值为======"+secondValue);
putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
puts.add(putReq);
// logger.info("2.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);
}else {
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), secondValue.getBytes());
puts.add(putReq);
// logger.info("3.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+secondValue);
}
}else{
PutRequest putReq = new PutRequest(table, currentRowKey, colFam,secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
puts.add(putReq);
// logger.info("4.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);
}
}
}else {
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
puts.add(putReq);
// logger.info("5.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);
}
// }
}
// }
}
}else{
throw new Exception("未配置子业务操作类型");
}
}
}else{
String propertyStr=PropertyUtil.getProperty(mylogtype+"Property");
ArrayList<String> property=StringToList(propertyStr);
for (int i = 0; i < property.size(); i++){
String value = new String(regArray[i]);
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, property.get(i).getBytes(), value.getBytes());
puts.add(putReq);
}
}
//针对搜索加入log_date log_time
if (!"".equals(result.get(1)) && puts.size()>0) {
PutRequest putReq = new PutRequest(table, currentRowKey, colFam, "RECORD_DATE".getBytes(), result.get(1).getBytes());
puts.add(putReq);
String[] dateInfo = result.get(1).split(" ");
if (dateInfo.length == 2) {
PutRequest putReq_date = new PutRequest(table, currentRowKey, colFam, "log_date".getBytes(), dateInfo[0].getBytes());
puts.add(putReq_date);
PutRequest putReq_time = new PutRequest(table, currentRowKey, colFam, "log_time".getBytes(), dateInfo[1].getBytes());
puts.add(putReq_time);
}
}
//4.其他处理
this.consumerCount = this.consumerCount + 1;
if(this.consumerCount > 100000){
this.consumerCount = 1;
}
System.out.println("结束处理时间"+System.currentTimeMillis());
return puts;
}catch (Exception e){
logger.debug(e);
return puts;
}
}
public List<AtomicIncrementRequest> getIncrements() {
incs.clear();
incs.add(new AtomicIncrementRequest(table, "totalEvents".getBytes(), colFam, eventCountCol));
return incs;
}
public void cleanUp() {
table = null;
colFam = null;
currentEvent = null;
columnNames = null;
currentRowKey = null;
}
/**
* 组合字段值
* @param modelKey
* @param regArray
* @param colStr
* @return
*/
public String groupValue(ArrayList<String> modelKey,String[] regArray,String colStr) throws Exception {
String RowKeyStr="";
for (int m=1;m<modelKey.size();m++){
int keyNumber=new Integer(modelKey.get(m));
if(new String(regArray[keyNumber])!=null && !"".equals(new String(regArray[keyNumber]))){
String strValue=new String(regArray[keyNumber]);
if(strValue!=null){
strValue=strValue.trim();
}
String iS=PropertyUtil.getProperty(colStr+"Special");
if(iS!=null){
String[] isSpecials= iS.split("#");
for(int s=0;s< iS.length();s++){
String isSpecial=isSpecials[s];
List<String> stringArrayList=StringToList(isSpecial);
if(isSpecial!=null && !"".equals(isSpecial) && stringArrayList.get(0).equals(regArray[keyNumber])){
strValue=specialStr(stringArrayList,strValue,regArray[keyNumber]);
}
}
}
RowKeyStr+=strValue+"_";
}
}
return RowKeyStr;
}
/**
* 读取配置
* @param context
*/
public void configure(Context context) {
String cols = new String(context.getString("columns"));
if("".equals(cols)){
return;
}
String[] names = cols.split(",");
columnNames = new byte[names.length][];
int i = 0;
for (String name : names) {
columnNames[i++] = name.getBytes();
System.out.println("columnNames: " + name);
}
}
/**
* 得到时间
* @param cols
* @return
* @throws Exception
*/
public ArrayList<String> getTime(String[] cols)throws Exception{
ArrayList<String> result=new ArrayList<String>();
Long currTime ;
Calendar date = Calendar.getInstance();
String currentYear = String.valueOf(date.get(Calendar.YEAR));
//使用日志中的记录时间11 Nov 2015 00:02:00
String logTime = cols[1] + " " + cols[0] + " " + currentYear + " " + cols[2];
SimpleDateFormat dateFormat = new SimpleDateFormat("dd MMM yyyy HH:mm:ss", Locale.US);
Date recorddate = dateFormat.parse(logTime);
dateFormat.applyPattern("yyyy-MM-dd HH:mm:ss");
logTime = dateFormat.format(recorddate);
currTime = recorddate.getTime();
String revTs =Long.toString(Long.MAX_VALUE - currTime) ;
result.add(revTs);
result.add(logTime);
return result;
}
/**
* String字符串分割之后转list
* @param str
* @return
*/
public ArrayList<String> StringToList(String str){
ArrayList<String> strList=new ArrayList<String>();
if(!"".equals(str) && str!=null){
String[] strs = str.split(",");
Collections.addAll(strList, strs);
}
return strList;
}
/**
* 判断组成key的字段是否特殊处理
* @param specialResult
* @param value
* @return
*/
public String specialStr(List <String> specialResult,String value,String cloumn) throws Exception {
// logger.info(cloumn+"======特殊字段处理之前======"+value);
//value去空格
if (specialResult.get(1).equals("subString") || specialResult.get(1).equals("substringtoOtherValue")) {
int start = new Integer(specialResult.get(2));
int end = 0;
if (specialResult.get(3).equals("length")) {
end = value.length();
} else {
end = new Integer(specialResult.get(3));
}
value = value.substring(start, end);
} else if (specialResult.get(1).equals("date")) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
Date date=simpleDateFormat.parse(value);
SimpleDateFormat sd=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
value=sd.format(date);
}
logger.info(cloumn+"======特殊字段处理之后的结果======"+value);
return value;
}
/**
* oedipus_info.log format
* Jul 21 17:25:44 hadoop03 oedipus[6215]: #source_name:erp_cruise|total_time:0.131|departport_name:|ship_name:|cruise_code:|ship_id:|use_all:1|close_date:|visa_date:|continent:|is_sellout:|travel_days:|sale_status:|destination:|company_id:|district_catalog_id:|company_name:|operator_code:|country_name:|lowest_price:|departure_type:|province:|trip_date:|line_code:|tags:|departure_name:|channel_id_match:|district_item_id:|departport_id:|line_id:|destination_name:|departure:|season_id:|product_type:|keyword:亚洲|country:|cruise_id:|line_name:|channel_id:dd95a34d2e1348ca9ed69982d7d44026|visa_end_date:|continent_name:|all_trip_date:|province_name:|travel_type:|use_mem:True|use_statics:True|use_sort_mul_value:1|querylogic_value:|uuid:|pt:|querylogic_term:|ordertype:[]|channel_match:False|use_synonym:True|use_or_querylogic:False|split_limit:20|use_or:False|limit:10|pagesize:10|use_core:False|use_correct:False|show_type:2|user:|use_split:False|page:1|total_found:15|
*/
//正则匹配解析参数
public String[] logTokenize(String eventStr) {
String logEntryPattern = "^(\\S+)\\s+(\\d+)\\s+([\\w:]+)\\s+(\\S+)\\s+(\\S+)\\[(\\d+)\\]\\S+\\s+([\\s\\S]*)";
Pattern p = Pattern.compile(logEntryPattern);
Matcher matcher = p.matcher(eventStr);
if (!matcher.matches()) {
logger.debug("Bad event_log :" + eventStr);
return null;
}
String[] columns = new String[matcher.groupCount()];
for (int i = 0; i < matcher.groupCount(); i++) {
columns[i] = matcher.group(i+1);
}
return columns;
}
public void setEvent(Event event) {
this.currentEvent = event;
}
public void configure(ComponentConfiguration conf) {
}
}
3、flume调用本地代码调试的配置
1)服务器上flume的flume-ng中配置JAVA_OPTS修改为
2)本地idea配置
host为flume部署的机器。port为上边JAVA_OPTS中的address