flume之aysnhbase与java的结合

1.flume的sink为aysnhbase的配置

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

#定义代理名称
monster.sources= AvroIn
monster.sinks = HbaseOut
monster.channels = monsterchannel


#具体定义source
monster.sources.AvroIn.type = avro
monster.sources.AvroIn.bind = 172.16.37.107
monster.sources.AvroIn.port = 42400


#具体定义sink
monster.sinks.HbaseOut.type = asynchbase
monster.sinks.HbaseOut.table = monstor_mm7mt
monster.sinks.HbaseOut.columnFamily = cf1
monster.sinks.HbaseOut.batchSize = 10
monster.sinks.HbaseOut.serializer = com.caissa.chador_flume.AsyncHbaseAllLogEventSerializer
monster.sinks.HbaseOut.serializer.columns = xunqi_number,protocol_type,message_type,submit_number,smsreq_rid,message_number,company_code,user_name,channel_value,billingusers_number,billing_type,aimphone_number,phone_number,aim_phone,appcode,is_status,messagevalid_time,message_sendtime,mobilevalide_number,valid_type,expenses,link_id,tp_pid,tp_udhi,message_format,message_code,mobiledeal_number,moblie_result,titile_length,mmcresouce_id,mmc_titile




#具体定义channel---filechannel
#monster.channels.monsterchannel.type = file
#monster.channels.monsterchannel.checkpointDir = /data/dataeckPoint/monster
#monster.channels.monsterchannel.backupCheckpointDir = /data/dataeckPoint/monster
#monster.channels.monsterchannel.keep-alive=10
#==========memorychannel================
monster.channels.monsterchannel.type = memory
monster.channels.monsterchannel.capacity=1000000
monster.channels.monsterchannel.keep-alive=10
monster.channels.monsterchannel.transactioncapacity=1000
#==========kafkachannel=================
#monster.channels.monsterchannel.type = org.apache.flume.channel.kafka.KafkaChannel
#monster.channels.monsterchannel.brokerList = 192.100.4.3:9092,192.100.4.13:9092,192.100.4.15:9092
#monster.channels.monsterchannel.zookeeperConnect = 192.100.4.3:2181,192.100.4.13:2181,192.100.4.15:2181
#monster.channels.monsterchannel.topic = FLUME_TEST_TOPIC
#monster.channels.monsterchannel.kafka.consumer.timeout.ms = 100
#monster.channels.monsterchannel.parseAsFlumeEvent = false




#组装source channel sink
#monster.sources.oedipus_info.channels= mc1
monster.sources.AvroIn.channels= monsterchannel
monster.sinks.HbaseOut.channel = monsterchannel

2.com.caissa.chador_flume.AsyncHbaseAllLogEventSerializer的来源。

将项目打包成jar包放到flume的lib目录下。就可以调用到

项目的结构为:

AsyncHbaseAllLogEventSerializer类的内容为:

package com.caissa.chador_flume;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.sink.hbase.AsyncHbaseEventSerializer;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.PutRequest;

import java.net.InetAddress;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by liuzhiling on 2018/7/26.
 */
public class AsyncHbaseAllLogEventSerializer implements AsyncHbaseEventSerializer {
	private byte[] table;
	private byte[] colFam;
	private Event currentEvent;
	private byte[][] columnNames;
	private final List<PutRequest> puts = new ArrayList<PutRequest>();
	private final List<AtomicIncrementRequest> incs = new ArrayList<AtomicIncrementRequest>();
	private final List<String> list = new ArrayList<String>();
	private byte[] currentRowKey;
	private final byte[] eventCountCol = "eventCount".getBytes();
	private String hostIP = "";
	private long consumerCount = 0;
	private int countSTC = 0;
//	private int colLength = 0;   //记录记录列的长度
	protected static final Log logger = LogFactory.getLog(AsyncHbaseAllLogEventSerializer.class);



	/**
	 * 启动初始化的方法。
	 * @param table
	 * @param cf
	 */
	public void initialize(byte[] table, byte[] cf) {
		InetAddress ia=null;
		try {
			ia = ia.getLocalHost();
			this.hostIP = ia.getHostAddress();
			System.out.println("本机的ip是 :"+this.hostIP);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * 异步处理数据的方法
	 * @return
     */
	public List<PutRequest> getActions() {
		System.out.println("开始处理时间"+System.currentTimeMillis());
		String eventStr = new String(currentEvent.getBody());
		try {
			String[] cols = logTokenize(eventStr);
			puts.clear();
			//1.判断是否为我们要入库的日志类型
			String logtype=PropertyUtil.getProperty("logType");
			ArrayList<String> logtypeList=StringToList(logtype);
			String mylogtype="";
			for(int logtypeNumer=0;logtypeNumer<logtypeList.size();logtypeNumer++){
				for (int colNumber=0;colNumber<cols.length;colNumber++){
					if(logtypeList.get(logtypeNumer).equals(cols[colNumber])){
						mylogtype=logtypeList.get(logtypeNumer);
						break;
					}
				}
				if(!"".equals(mylogtype)){
					break;
				}
			}
			int logtypeNumber=new Integer(PropertyUtil.getProperty(mylogtype+"Number"));
			if (!mylogtype.equals(cols[logtypeNumber])){
				return puts;
			}
			//2.配置key
			String req = cols[6];
			this.table = PropertyUtil.getProperty(mylogtype+"Table").getBytes();
			this.colFam = PropertyUtil.getProperty(mylogtype+"cf").getBytes();
			String splitStr=PropertyUtil.getProperty(mylogtype+"Split");
			String[] regArray = req.split(splitStr, -1);
			String propertyKey=PropertyUtil.getProperty(mylogtype+"Key");
			ArrayList<String> result=getTime(cols);//得到时间
			if(!"".equals("propertyKey") && propertyKey!=null){
				ArrayList<String> modelKey=StringToList(propertyKey);
				/**
				 * 判断key的模板类型
				 * 1.为字段_时间_随机数
				 * 2.为字段的组合
				 */
				if(modelKey.get(0).equals("1")){
					this.countSTC += 1;
					this.countSTC = this.countSTC % 10000;
					String RowKeyStr=groupValue(modelKey,regArray,cols[logtypeNumber]);
					currentRowKey = (RowKeyStr+result.get(0)+"_"+Long.toString(this.countSTC)).getBytes();
				}else if(modelKey.get(0).equals("2")){
					String RowKeyStr=groupValue(modelKey,regArray,cols[logtypeNumber]);
					RowKeyStr=RowKeyStr.substring(0,RowKeyStr.length());
					currentRowKey=RowKeyStr.getBytes();
				}
			}else{
				throw new Exception("未配置key的模板以及参数");
			}
			/**
			 * 3.组合入库字段
			 * 入库规则判断字段putValue。
			 * putValue空值代表没有特殊处理。就是配置文件一对一入值
			 * putValue为1代表字段入值需要特殊处理。需要过滤字段。需要判断columnNames需要的相应的值。
			 * 注意:此处需要配置{logtype}Property和{logtype}PropertyValue值。需要一一对应
			 * putValue为2代表为该类型的日志分小类型。字段为这几个类型的交集。需要各个类型特殊处理
			 * 注意:此处需要配置{sencondlogtype}Property和{sncondlogtype}PropertyValue主要一一对应
			 */

			String putValue=PropertyUtil.getProperty(mylogtype+"PutValue");
			logger.info("eventStr: =========" + eventStr+"table:====== " + new String(table)+"======colFam======"+new String(colFam)+"=========" +
			            "putValue===="+putValue+" =======mylogtype===="+mylogtype+"====__currentRowKey: " + new String(currentRowKey)  + "========= __consumerCount:"+ this.consumerCount);
				if(putValue!=null && !"".equals(putValue)){
					if(putValue.equals("1")){
						String propertyStr=PropertyUtil.getProperty(mylogtype+"Property");
						ArrayList<String> property=StringToList(propertyStr);
						String propertyValueStr=PropertyUtil.getProperty(mylogtype+"PropertyValue");
						if(propertyValueStr!=null && !"".equals(propertyValueStr)){
							ArrayList<String> propertyValue=StringToList(propertyValueStr);
							//for (int i = 0; i < this.colLength; i++){
							for(int p=0;p<property.size();p++){
								int propertyNumber=Integer.parseInt(propertyValue.get(p));
								String rowValue=regArray[propertyNumber];
								//给rowvalue去空格
								if(rowValue!=null){
									rowValue=rowValue.trim();
								}
								//if(new String(columnNames[i]).equals(property.get(propertyNumber))){
								String SpecialStr=PropertyUtil.getProperty(mylogtype+"Special");
								if(SpecialStr!=null){
									String [] SpecialPropertyStrs=SpecialStr.split("#");
									for(int s=0;s<SpecialPropertyStrs.length;s++){
										String SpecialPropertyStr=SpecialPropertyStrs[s];
										ArrayList<String> SpecialPropertyList=StringToList(SpecialPropertyStr);
										if(SpecialPropertyStr!=null && !"".equals(SpecialPropertyStr)){
											String secondValue=specialStr(SpecialPropertyList,rowValue,property.get(propertyNumber));
											if(SpecialPropertyList.get(1).equals("substringtoOtherValue")){
												PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, SpecialPropertyList.get(4).getBytes(), secondValue.getBytes());
												puts.add(putReq);
												putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
												puts.add(putReq);
											}else{
												PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), secondValue.getBytes());
												puts.add(putReq);
											}
										}else{
											PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
											puts.add(putReq);
										}
									}
								}else{
									PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, property.get(p).getBytes(), rowValue.getBytes());
									puts.add(putReq);
								}
								//								}
							}
							//						}
						}else{
							throw new Exception("未配置property和propertyValue的值");
						}
					}else if(putValue.equals("2")){
						String secondOprate=PropertyUtil.getProperty(mylogtype+"SecondOprate");
						if(secondOprate!=null && !"".equals(secondOprate)){
							ArrayList<String> sencondOprateList=StringToList(secondOprate);
							String secondOprateType= new String(regArray[new Integer(sencondOprateList.get(0))]);
							String secondPropertyStr=null;
							String secondPropertyValueStr=null;
							if(mylogtype.equals("appproxy") || mylogtype.equals("bossproxy")){
								secondPropertyStr=PropertyUtil.getProperty(mylogtype+secondOprateType+"Property");
								secondPropertyValueStr=PropertyUtil.getProperty(mylogtype+secondOprateType+"PropertyValue");
							}else{
								 secondPropertyStr=PropertyUtil.getProperty(secondOprateType+"Property");
								 secondPropertyValueStr=PropertyUtil.getProperty(secondOprateType+"PropertyValue");

							}
							if(secondPropertyStr!=null && !"".equals(secondPropertyStr) && secondPropertyValueStr!=null && !"".equals(secondPropertyValueStr)){
								ArrayList<String> secondeProperty=StringToList(secondPropertyStr);
								ArrayList<String> secondePropertyValue=StringToList(secondPropertyValueStr);
								//	for (int i = 0; i < this.colLength; i++){
								for(int p=0;p<secondeProperty.size();p++){
									int secondeNumber=Integer.parseInt(secondePropertyValue.get(p));
									String sencondValue=regArray[secondeNumber];
									//给sencondValue去空格
									if(sencondValue!=null){
										sencondValue=sencondValue.trim();
									}
									//if(new String(columnNames[i]).equals(secondeProperty.get(secondeNumber))){
									String secondSpecialStr=PropertyUtil.getProperty(secondOprateType+"Special");
//									logger.info("====secondPropertyStr======"+secondPropertyStr+"=====secondSpecialStr===="+secondSpecialStr);
									if(secondSpecialStr!=null ){
										String[] secondSpecialPropertyStrs=secondSpecialStr.split("#");
										for(int s=0;s<secondSpecialPropertyStrs.length;s++){
											String secondSpecialPropertyStr=secondSpecialPropertyStrs[s];
											ArrayList<String> secondSpecialPropertyList=StringToList(secondSpecialPropertyStr);
											if(secondSpecialPropertyStr!=null && !"".equals(secondSpecialPropertyStr)){
												String secondValue=specialStr(secondSpecialPropertyList,sencondValue,secondeProperty.get(secondeNumber));
												if(secondSpecialPropertyList.get(1).equals("substringtoOtherValue")){
													PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, secondSpecialPropertyList.get(4).getBytes(), secondValue.getBytes());
													puts.add(putReq);
//													logger.info("1.====入库字段为======"+secondSpecialPropertyList.get(4)+"======值为======"+secondValue);
													putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
													puts.add(putReq);
//													logger.info("2.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);
												}else {
													PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), secondValue.getBytes());
													puts.add(putReq);
//													logger.info("3.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+secondValue);

												}
											}else{
												PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam,secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
												puts.add(putReq);
//												logger.info("4.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);
											}
										}
									}else {
										PutRequest 	putReq = new PutRequest(table, currentRowKey, colFam, secondeProperty.get(secondeNumber).getBytes(), sencondValue.getBytes());
										puts.add(putReq);
//										logger.info("5.====入库字段为======"+secondeProperty.get(secondeNumber)+"======值为======"+sencondValue);

									}
									//	}
								}
								//							}
							}

						}else{
							throw  new Exception("未配置子业务操作类型");
						}
					}
				}else{
					String propertyStr=PropertyUtil.getProperty(mylogtype+"Property");
					ArrayList<String> property=StringToList(propertyStr);
					for (int i = 0; i < property.size(); i++){
						String value = new String(regArray[i]);
						PutRequest putReq = new PutRequest(table, currentRowKey, colFam, property.get(i).getBytes(), value.getBytes());
						puts.add(putReq);
					}
				}

			//针对搜索加入log_date log_time
			if (!"".equals(result.get(1)) && puts.size()>0) {
				PutRequest putReq = new PutRequest(table, currentRowKey, colFam, "RECORD_DATE".getBytes(), result.get(1).getBytes());
				puts.add(putReq);
				String[] dateInfo = result.get(1).split(" ");
				if (dateInfo.length == 2) {
					PutRequest putReq_date = new PutRequest(table, currentRowKey, colFam, "log_date".getBytes(), dateInfo[0].getBytes());
					puts.add(putReq_date);
					PutRequest putReq_time = new PutRequest(table, currentRowKey, colFam, "log_time".getBytes(), dateInfo[1].getBytes());
					puts.add(putReq_time);
				}

			}
			//4.其他处理
			this.consumerCount = this.consumerCount + 1;
			if(this.consumerCount > 100000){
				this.consumerCount = 1;
			}
			System.out.println("结束处理时间"+System.currentTimeMillis());
			return puts;
		}catch (Exception e){
			logger.debug(e);
			return puts;
		}
	}

	public List<AtomicIncrementRequest> getIncrements() {
		incs.clear();
		incs.add(new AtomicIncrementRequest(table, "totalEvents".getBytes(), colFam, eventCountCol));
		return incs;
	}

	public void cleanUp() {
		table = null;
		colFam = null;
		currentEvent = null;
		columnNames = null;
		currentRowKey = null;
	}

	/**
	 * 组合字段值
	 * @param modelKey
	 * @param regArray
	 * @param colStr
	 * @return
	 */
	public String groupValue(ArrayList<String> modelKey,String[] regArray,String colStr) throws Exception {
		String RowKeyStr="";
		for (int m=1;m<modelKey.size();m++){
			int keyNumber=new Integer(modelKey.get(m));
			if(new String(regArray[keyNumber])!=null && !"".equals(new String(regArray[keyNumber]))){
				String strValue=new String(regArray[keyNumber]);
				if(strValue!=null){
					strValue=strValue.trim();
				}
				String iS=PropertyUtil.getProperty(colStr+"Special");
				if(iS!=null){
					String[] isSpecials= iS.split("#");
					for(int s=0;s< iS.length();s++){
						String isSpecial=isSpecials[s];
						List<String> stringArrayList=StringToList(isSpecial);
						if(isSpecial!=null && !"".equals(isSpecial) && stringArrayList.get(0).equals(regArray[keyNumber])){
							strValue=specialStr(stringArrayList,strValue,regArray[keyNumber]);
						}
					}
				}
				RowKeyStr+=strValue+"_";
			}
		}
		return RowKeyStr;
	}

	/**
	 * 读取配置
	 * @param context
	 */
	public void configure(Context context) {
		String cols = new String(context.getString("columns"));
		if("".equals(cols)){
			return;
		}
		String[] names = cols.split(",");
		columnNames = new byte[names.length][];

		int i = 0;
		for (String name : names) {
			columnNames[i++] = name.getBytes();
			System.out.println("columnNames: " + name);
		}
	}


	/**
	 * 得到时间
	 * @param cols
	 * @return
	 * @throws Exception
	 */
	public ArrayList<String> getTime(String[] cols)throws Exception{
		ArrayList<String> result=new ArrayList<String>();
		Long currTime ;
		Calendar date = Calendar.getInstance();
		String currentYear = String.valueOf(date.get(Calendar.YEAR));
		//使用日志中的记录时间11 Nov 2015 00:02:00
		String logTime = cols[1] + " " + cols[0] + " " + currentYear + " " + cols[2];
		SimpleDateFormat dateFormat = new SimpleDateFormat("dd MMM yyyy HH:mm:ss", Locale.US);
		Date recorddate = dateFormat.parse(logTime);
		dateFormat.applyPattern("yyyy-MM-dd HH:mm:ss");
		logTime = dateFormat.format(recorddate);
		currTime = recorddate.getTime();
		String revTs =Long.toString(Long.MAX_VALUE - currTime) ;
		result.add(revTs);
		result.add(logTime);
		return result;
	}

	/**
	 * String字符串分割之后转list
	 * @param str
	 * @return
	 */
	public ArrayList<String> StringToList(String str){
		ArrayList<String> strList=new ArrayList<String>();
		if(!"".equals(str) && str!=null){
			String[] strs = str.split(",");
			Collections.addAll(strList, strs);
		}
		return strList;
	}

	/**
	 * 判断组成key的字段是否特殊处理
	 * @param specialResult
	 * @param value
	 * @return
	 */
	public String specialStr(List <String> specialResult,String value,String cloumn) throws Exception {
//		logger.info(cloumn+"======特殊字段处理之前======"+value);
		//value去空格
		if (specialResult.get(1).equals("subString") || specialResult.get(1).equals("substringtoOtherValue")) {
			int start = new Integer(specialResult.get(2));
			int end = 0;
			if (specialResult.get(3).equals("length")) {
				end = value.length();
			} else {
				end = new Integer(specialResult.get(3));
			}
			value = value.substring(start, end);
		} else if (specialResult.get(1).equals("date")) {
			SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
			Date date=simpleDateFormat.parse(value);
			SimpleDateFormat sd=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			value=sd.format(date);
		}
		logger.info(cloumn+"======特殊字段处理之后的结果======"+value);
		return value;
	}

	/**
	 * oedipus_info.log format
	 * Jul 21 17:25:44 hadoop03 oedipus[6215]: #source_name:erp_cruise|total_time:0.131|departport_name:|ship_name:|cruise_code:|ship_id:|use_all:1|close_date:|visa_date:|continent:|is_sellout:|travel_days:|sale_status:|destination:|company_id:|district_catalog_id:|company_name:|operator_code:|country_name:|lowest_price:|departure_type:|province:|trip_date:|line_code:|tags:|departure_name:|channel_id_match:|district_item_id:|departport_id:|line_id:|destination_name:|departure:|season_id:|product_type:|keyword:亚洲|country:|cruise_id:|line_name:|channel_id:dd95a34d2e1348ca9ed69982d7d44026|visa_end_date:|continent_name:|all_trip_date:|province_name:|travel_type:|use_mem:True|use_statics:True|use_sort_mul_value:1|querylogic_value:|uuid:|pt:|querylogic_term:|ordertype:[]|channel_match:False|use_synonym:True|use_or_querylogic:False|split_limit:20|use_or:False|limit:10|pagesize:10|use_core:False|use_correct:False|show_type:2|user:|use_split:False|page:1|total_found:15|
	 */
	//正则匹配解析参数
	public String[] logTokenize(String eventStr) {
		String logEntryPattern = "^(\\S+)\\s+(\\d+)\\s+([\\w:]+)\\s+(\\S+)\\s+(\\S+)\\[(\\d+)\\]\\S+\\s+([\\s\\S]*)";
		Pattern p = Pattern.compile(logEntryPattern);
		Matcher matcher = p.matcher(eventStr);
		if (!matcher.matches()) {
			logger.debug("Bad event_log :" + eventStr);
			return null;
		}
		String[] columns = new String[matcher.groupCount()];
		for (int i = 0; i < matcher.groupCount(); i++) {
			columns[i] = matcher.group(i+1);
		}
		return columns;
	}

	public void setEvent(Event event) {
		this.currentEvent = event;
	}
	public void configure(ComponentConfiguration conf) {

	}

	}

3、flume调用本地代码调试的配置

1)服务器上flume的flume-ng中配置JAVA_OPTS修改为

2)本地idea配置

host为flume部署的机器。port为上边JAVA_OPTS中的address

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值