自定义Hbasesink序列化类实现自定义rowkey及实时传输数据库数据至大数据平台
自定义Hbasesink序列化类实现自定义rowkey
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.sink.hbase;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Row;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* An {@link HbaseEventSerializer} which parses columns based on a supplied
* regular expression and column name list.
* <p>
* Note that if the regular expression does not return the correct number of
* groups for a particular event, or it does not correctly match an event, the
* event is silently dropped.
* <p>
* Row keys for each event consist of a timestamp concatenated with an
* identifier which enforces uniqueness of keys across flume agents.
* <p>
* See static constant variables for configuration options.
*/
public class RegexCREventSerializer implements HbaseEventSerializer {
// Config vars
/** Regular expression used to parse groups from event data. */
public static final String REGEX_CONFIG = "regex";
public static final String REGEX_DEFAULT = "(.*)";
/** Whether to ignore case when performing regex matches. */
public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";
public static final boolean IGNORE_CASE_DEFAULT = false;
/** Comma separated list of column names to place matching groups in. */
public static final String COL_NAME_CONFIG = "colNames";
public static final String COLUMN_NAME_DEFAULT = "payload";
/** Index of the row key in matched regex groups */
public static final String ROW_KEY_INDEX_CONFIG = "rowKeyIndex";
/** Placeholder in colNames for row key */
public static final String ROW_KEY_NAME = "ROW_KEY";
/** W