/**
* Flink 程序入口<br>
*/
public class FlinkKafkaParseJob {
/**
* Logger
*/
private static final Logger LOGGER = LoggerFactory.getLogger(FlinkKafkaParseJob.class);
/**
* Consumer Group ID
*/
private static final String GROUP_ID = "bc713596-bfc8-4ff5-bd50-a5442ed8255";
public void parseKafkaData() {
String taskIds = FlinkReadProperties.getTaskIds();
String zkServer = FlinkReadProperties.getRDRSZookeeperServer();
Map<String, Object> kafkaInfoMap = new HashMap<>();
try {
kafkaInfoMap = getKafkaInfoMap(zkServer, taskIds, GROUP_ID);
} catch (Exception e) {
System.exit(0);
}
//创建源
String brokers = MapUtils.getString(kafkaInfoMap, "brokers");
List<String> topics = (List<String>)MapUtils.getObject(kafkaInfoMap, "topics");
String groupId = MapUtils.getString(kafkaInfoMap, "groupId");
String schema = MapUtils.getString(kafkaInfoMap, "schema");
FlinkKafkaConsumer011<Entry> consumer = KafkaSource.createSource(brokers, topics, groupId, schema);
consumer.setStartFromGroupOffsets();
//flink设置消费源
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置checkpoint 会保证数据的端到端精准一次消费。sink里面逻辑失败不会提交offset
//env.enableCheckpointing(5000);
DataStreamSource<Entry> kafkaSource = env.addSource(consumer).setParallelism(5);
try {
//通map对数据进行处理
if (FlinkReadProperties.batchFlag()){
LOGGER.info("batch flink job run");
SingleOutputStreamOperator<TransformationEntry> flatMapOutputStream = kafkaSource.flatMap(new EntryFlatMapFunction()).setParallelism(1);
flatMapOutputStream.map(new EntryMapFunction()).setParallelism(20);
} else {
LOGGER.info("single flink job run");
kafkaSource.map(new DataTransformation()).setParallelism(5);
}
env.execute("flink consume kafka data");
} catch (Exception e) {
LOGGER.error("flink job error", ExceptionUtils.getFullStackTrace(e));
}
}
}
==========================================================================
/**
* Flink binlog数据处理程序
*/
public class EntryFlatMapFunction implements FlatMapFunction<Entry, TransformationEntry> {
/**
* Logger
*/
Logger LOGGER = LoggerFactory.getLogger(EntryFlatMapFunction.class);
@Override
public void flatMap(Entry entry, Collector<TransformationEntry> collector) throws Exception {
//处理MySQL 表中的行数据,转换成BinlogRowBean对象
BinlogRowBean binlogRowBean = dealOneRow(entry.getBody().getRows().get(0), entry.getHeader().getEventType(), entry.getHeader().getTableName());
//获取物理表名
String tableName = binlogRowBean.getTableName();
//判断是否为分表,如果是则截取最后分隔符下划线“_”之前的逻辑名称
String logicTableName;
if (tableName.matches(Constant.TABLE_SUFFIX_PATTERN)){
logicTableName = StringUtils.substringBeforeLast(tableName, "_");
} else {
logicTableName = tableName;
}
//根据逻辑表名从配置文件中获取定义的数据提取规则
String tableDefineRule = FlinkReadProperties.getProperty(logicTableName);
//没有MySQL到HBase的映射规则,
if (StringUtils.isBlank(tableDefineRule)){
LOGGER.error("MySQL数据库表" + logicTableName + "没有定义和HBase表的映射关系");
return;
}
//根据配置文件中定义的规则,获取rowKey和需要存储到HBase的业务数据
TableFieldRelation tableFieldRelation = JSON.parseObject(tableDefineRule, TableFieldRelation.class);
String rowKey = RowKeyUtil.generateRowKey(binlogRowBean, tableFieldRelation.getRowKey());
Map<String, String> hBaseDataMap;
if (binlogRowBean.getEventType() == EventType.DELETE){
hBaseDataMap = generateHBaseDataMap(tableFieldRelation, binlogRowBean.getBeforeColumns());
} else {
hBaseDataMap = generateHBaseDataMap(tableFieldRelation, binlogRowBean.getAfterColumns());
}
// LOGGER.debug("订阅表名:" + logicTableName + ", RowKey: " + rowKey);
//获取TransformationEntry Factory
AbstractDataTransformation dataTransformation = DataTransformationFactory.getDataTransformation(logicTableName);
//生成TransformationEntry
List<TransformationEntry> transformationEntryList = dataTransformation.parseToTransformationEntry(logicTableName, rowKey, hBaseDataMap, binlogRowBean);
if (CollectionUtils.isEmpty(transformationEntryList)){
return;
}
//Collect
for (TransformationEntry transformationEntry : transformationEntryList){
collector.collect(transformationEntry);
}
}
/**
* 生成HBase Column Data Map: key为Hbase Column Name, value为Column value
* @param tableFieldRelation
* @param dbDataMap
* @return
*/
public Map<String, String> generateHBaseDataMap(TableFieldRelation tableFieldRelation, Map<String, Object> dbDataMap){
List<String> columnList = Arrays.asList(tableFieldRelation.getField().split(","));
return generateHBaseDataMap(columnList, dbDataMap);
}
/**
* 生成HBase Column Data Map: key为Hbase Column Name, value为Column value
* @param columnList
* @param dbDataMap
* @return
*/
public Map<String, String> generateHBaseDataMap(List<String> columnList, Map<String, Object> dbDataMap){
ColumnAliasMappingBean columnAliasMappingBean = null;
String dbColumnName = null;
Map<String, String> hbaseDataMap = new HashMap<>();
for (String columnStr : columnList) {
//HBase以别名存储,转换成column映射对象
columnAliasMappingBean = columnAlias(columnStr);
//将hbase column name 和 value 写入 data map
dbColumnName = columnAliasMappingBean.getDbColumnName();
String value = dbDataMap.get(dbColumnName) == null ? "" : String.valueOf(dbDataMap.get(dbColumnName));
hbaseDataMap.put(columnAliasMappingBean.getHbaseColumnName(), value);
}
return hbaseDataMap;
}
/**
* 将field 字符串转换成Column Name映射对象,获取dbColumnName 和 hbaseColumnName
* @param fieldStr
* @return
*/
private ColumnAliasMappingBean columnAlias(String fieldStr){
if (StringUtils.isBlank(fieldStr)){
return new ColumnAliasMappingBean("","");
}
//别名映射对象
ColumnAliasMappingBean columnAliasMappingBean = null;
String[] splitNameArr = fieldStr.split(" as ");
//别名映射
if (splitNameArr.length == 2){
columnAliasMappingBean = new ColumnAliasMappingBean(splitNameArr[0].trim(),splitNameArr[1].trim());
} else {
columnAliasMappingBean = new ColumnAliasMappingBean(splitNameArr[0].trim(),splitNameArr[0].trim());
}
return columnAliasMappingBean;
}
public static BinlogRowBean dealOneRow(Row row, EventType eventType, String tableName) {
//beforeColumns转换为map list
List<Column> columns = row.getBeforeColumns();
Map<String, Object> beforeColumns = dealColumns(columns);
//afterColumns转换为map list
columns = row.getAfterColumns();
Map<String, Object> afterColumns = dealColumns(columns);
//对比数据,变化的数据保存到changedColumns
Map<String, Object> changedColumns = compareValue(beforeColumns, afterColumns, eventType);
BinlogRowBean binlogRowBean = new BinlogRowBean();
binlogRowBean.setTableName(tableName);
binlogRowBean.setEventType(eventType);
binlogRowBean.setBeforeColumns(beforeColumns);
binlogRowBean.setAfterColumns(afterColumns);
binlogRowBean.setChangedColumns(changedColumns);
return binlogRowBean;
}
public static Map<String, Object> compareValue(Map<String, Object> beforeColumns, Map<String, Object> afterColumns, EventType eventType) {
Map<String, Object> changedColumns = new HashMap<>();
return changedColumns;
}
public static Map<String, Object> dealColumns(List<Column> columns) {
Map<String, Object> map = new HashMap<>();
if (CollectionUtils.isEmpty(columns)) {
return map;
}
for (Column column : columns) {
map.put(column.getName().toUpperCase(), column.getValue());
}
return map;
}
}
======================================================================================
/** * HBase Util 类,HBase表数据的增删改查操作<br> */ public class HBaseUtil { private static final Logger LOGGER = LoggerFactory.getLogger(HBaseUtil.class); private static final String HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum"; private static final String HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT = "hbase.zookeeper.property.clientPort"; private static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent"; private static final String HADOOP_USER_NAME = "hadoop.user.name"; private static Connection conn; private static Properties properties; static { properties = PropertyUtil.getInstance().getProperties(); conn = initHBaseConnection(properties); } public static Connection getConn() { return conn; } /** * 初始化HBase Connection * @param properties * @return */ public static Connection initHBaseConnection(Properties properties){ Configuration config = HBaseUtil.initHBaseConfiguration(properties); User user = User.create(UserGroupInformation.createRemoteUser(properties.getProperty("hbase.client.user"))); try { return ConnectionFactory.createConnection(config, user); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase集群连接失败,集群信息:" + properties.getProperty(HBASE_ZOOKEEPER_QUORUM)); return null; } } /** * 初始化HBase Configuration * @param properties * @return */ public static Configuration initHBaseConfiguration(Properties properties){ Configuration hconf = HBaseConfiguration.create(); hconf.set(HConstants.ZOOKEEPER_QUORUM, properties.getProperty(HBASE_ZOOKEEPER_QUORUM)); hconf.set(HConstants.ZOOKEEPER_CLIENT_PORT, properties.getProperty(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT)); hconf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, properties.getProperty(ZOOKEEPER_ZNODE_PARENT)); hconf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 120000); hconf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 180000); hconf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 240000); hconf.set(HADOOP_USER_NAME, properties.getProperty("hbase.client.user")); return hconf; } /** * 插入数据 * @param tableName * @param rowKey * @param columnFamily * @param qualifier * @param data */ public static boolean putData(String tableName, String rowKey, String columnFamily,String qualifier,String data){ Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Put put = new Put(rowKey.getBytes()); put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier), Bytes.toBytes(data)); table.put(put); return true; } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + data); return false; } finally { closeTable(table); } } /** * 保存数据入HBase表 * @param tableName * @param rowKey * @param columnFamily * @param dataMap */ public static void putData(String tableName, String rowKey, String columnFamily, Map<String,String> dataMap) { Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Put put = getPut(rowKey, columnFamily, dataMap); table.put(put); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + JSON.toJSONString(dataMap)); } finally { closeTable(table); } } /** * 枷锁 * @param tableName * @param rowKey * @param columnFamily * @param checkQualifier * @param checkValue * @param dataMap */ public static boolean putDataWithLock(String tableName, String rowKey, String columnFamily, String checkQualifier, String checkValue ,Map<String,String> dataMap){ Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Put put = getPut(rowKey, columnFamily, dataMap); return table.checkAndPut(Bytes.toBytes(rowKey), Bytes.toBytes(columnFamily), Bytes.toBytes(checkQualifier), Bytes.toBytes(checkValue), put); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + JSON.toJSONString(dataMap)); return false; } finally { closeTable(table); } } /** * 获取Put * @param rowKey * @param columnFamily * @param dataMap * @return */ private static Put getPut(String rowKey, String columnFamily, Map<String,String> dataMap){ Put put = new Put(Bytes.toBytes(rowKey)); if(dataMap != null){ Set<Map.Entry<String,String>> set = dataMap.entrySet(); for(Map.Entry<String,String> entry : set){ String key = entry.getKey(); String value = entry.getValue(); put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(key), Bytes.toBytes(value)); } } return put; } /** * 获取HBase 指定列数据 * @param tableName * @param rowKey * @param columnFamily * @param columnName * @return */ public static String getColumnData(String tableName, String rowKey, String columnFamily, String columnName) { Table table = null; byte[] valueBytes = null; try { table = conn.getTable(TableName.valueOf(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); Result result =table.get(get); valueBytes = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName)); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey +", Column Name: " + columnName); } finally { closeTable(table); } if(valueBytes == null){ return null; } else { return new String(valueBytes); } } /** * 查询一行 * @param tableName * @param rowKey * @return */ public static Map<String, Object> getData(String tableName, String rowKey) { Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); Result result = table.get(get); return resultToMap(result, null); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey); return new HashMap<>(); } finally { closeTable(table); } } /** * 查询一行, 返回对应列族中所有列 * @param tableName * @param rowKey * @return */ public static Map<String, String> getData(String tableName, String rowKey, String columnFamily) { Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); Result result = table.get(get); Map resultMap = resultToMap(result, columnFamily); return resultMap.get(columnFamily) == null ? new HashMap<>() : (HashMap<String, String>)resultMap.get(columnFamily); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey); return new HashMap<>(); } finally { closeTable(table); } } /** * 扫描获取所有数据(数据量不大) * @param tableName * @param prefix * @return */ public static List<Map<String, Object>> scanDataByRowPrefix(String tableName, String prefix){ Table table = null; List<Map<String, Object>> resultList = new ArrayList<>(); try { table = HBaseUtil.getConn().getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); scan.setRowPrefixFilter(Bytes.toBytes(prefix)); ResultScanner scanner = table.getScanner(scan); for (Result result = scanner.next(); result != null; result = scanner.next()){ resultList.add(resultToMap(result, null)); } } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " Scan数据失败, Row prefix:" + prefix); } finally { closeTable(table); return resultList; } } /** * 根据Filter进行扫描,并返回指定列族中的所有列 * @param tableName * @param columnFamily * @param filter * @return */ public static List<Map<String, String>> scanDataByFilter(String tableName, String columnFamily, String startRow, String stopRow,Filter filter){ Table table = null; List<Map<String, String>> resultList = new ArrayList<>(); try { table = HBaseUtil.getConn().getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); scan.setFilter(filter); scan.setStartRow(Bytes.toBytes(startRow)); scan.setStopRow(Bytes.toBytes(stopRow)); scan.setCaching(100); ResultScanner scanner = table.getScanner(scan); Map resultMap; for (Result result = scanner.next(); result != null; result = scanner.next()){ resultMap = resultToMap(result, columnFamily); resultList.add(resultMap.get(columnFamily) == null ? new HashMap<>() : (HashMap<String, String>)resultMap.get(columnFamily)); } } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " Scan数据失败, Row prefix:" + JSON.toJSONString(filter)); } finally { closeTable(table); return resultList; } } /** * 根据rowKey 删除数据 * @param tableName * @param rowKey */ public static boolean deleteData(String tableName, String rowKey) { Table table = null; try { table = conn.getTable(TableName.valueOf(tableName)); Delete delete = new Delete(Bytes.toBytes(rowKey)); table.delete(delete); return true; } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ tableName + " 删除数据失败, Row Key:" + rowKey); return false; } finally { closeTable(table); } } /** * 关闭HBase table * @param table */ public static void closeTable(Table table){ if (table != null){ try { table.close(); } catch (IOException e) { e.printStackTrace(); LOGGER.error("HBase表 "+ table.getName() + " 关闭失败"); } } } /** * 把result转换成map,方便返回json数据, 如果columnFamily为空则返回所有的列族,如果columnFamily不为空则返回列族中所有的列 * 格式:{rowKey=r1, author={name=zhangsan}, article={title=this is title, content=this is content}} * @param result * @return */ public static Map<String, Object> resultToMap(Result result, String columnFamily) { Map<String, Object> resMap = new HashMap<>(); if (result == null || result.listCells() == null || result.listCells().isEmpty()){ return resMap; } String rowKey = ""; for (Cell cell : result.listCells()) { //获取Row Family Qualifier Value String tempRow = Bytes.toString(CellUtil.cloneRow(cell)); //rowKey 赋值 rowKey = tempRow; String tempFamily = Bytes.toString(CellUtil.cloneFamily(cell)); String tempQualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); String tempValue = Bytes.toString(CellUtil.cloneValue(cell)); //如果columnFamily 不为空,并且列族不匹配则跳过 if (StringUtils.isNotBlank(columnFamily) && !StringUtils.equals(tempFamily, columnFamily)){ continue; } //从resMap中获取family Map,然后将qualifier和value以键值对的形式放入family map中 Map<String, String> familyMap; if (resMap.get(tempFamily) == null){ familyMap = new HashMap<>(); resMap.put(tempFamily,familyMap); } else { familyMap = (HashMap<String, String>)resMap.get(tempFamily); } familyMap.put(tempQualifier,tempValue); } resMap.put("rowKey", rowKey); return resMap; } /** * 通过字符串+1 获取stopRow * @param row * @return */ public static String stopRowByAddOne(String row){ int length = row.length(); char lastChar = row.charAt(length - 1); return row.substring(0, length - 1) + (char)(lastChar + 1); } }