Flink实时处理程序

/**
 * Flink 程序入口<br>
 */
public class FlinkKafkaParseJob {

    /**
     * Logger
     */
    private static final Logger LOGGER = LoggerFactory.getLogger(FlinkKafkaParseJob.class);

    /**
     * Consumer Group ID
     */
    private static final String GROUP_ID = "bc713596-bfc8-4ff5-bd50-a5442ed8255";

    public void parseKafkaData() {

        String taskIds = FlinkReadProperties.getTaskIds();
        String zkServer = FlinkReadProperties.getRDRSZookeeperServer();

        Map<String, Object> kafkaInfoMap = new HashMap<>();
        try {
            kafkaInfoMap = getKafkaInfoMap(zkServer, taskIds, GROUP_ID);
        } catch (Exception e) {
            System.exit(0);
        }

        //创建源
        String brokers = MapUtils.getString(kafkaInfoMap, "brokers");
        List<String> topics = (List<String>)MapUtils.getObject(kafkaInfoMap, "topics");
        String groupId = MapUtils.getString(kafkaInfoMap, "groupId");
        String schema = MapUtils.getString(kafkaInfoMap, "schema");

        FlinkKafkaConsumer011<Entry> consumer = KafkaSource.createSource(brokers, topics, groupId, schema);
        consumer.setStartFromGroupOffsets();

        //flink设置消费源
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置checkpoint 会保证数据的端到端精准一次消费。sink里面逻辑失败不会提交offset
        //env.enableCheckpointing(5000);
        DataStreamSource<Entry> kafkaSource = env.addSource(consumer).setParallelism(5);

        try {
            //通map对数据进行处理
            if (FlinkReadProperties.batchFlag()){
                LOGGER.info("batch flink job run");
                SingleOutputStreamOperator<TransformationEntry> flatMapOutputStream = kafkaSource.flatMap(new EntryFlatMapFunction()).setParallelism(1);
                flatMapOutputStream.map(new EntryMapFunction()).setParallelism(20);
            } else {
                LOGGER.info("single flink job run");
                kafkaSource.map(new DataTransformation()).setParallelism(5);
            }
            env.execute("flink consume kafka data");
        } catch (Exception e) {
            LOGGER.error("flink job error", ExceptionUtils.getFullStackTrace(e));
        }
    }

}

==========================================================================
/**
 * Flink binlog数据处理程序
 */
public class EntryFlatMapFunction implements FlatMapFunction<Entry, TransformationEntry> {

    /**
     * Logger
     */
    Logger LOGGER = LoggerFactory.getLogger(EntryFlatMapFunction.class);

    @Override
    public void flatMap(Entry entry, Collector<TransformationEntry> collector) throws Exception {
        //处理MySQL 表中的行数据,转换成BinlogRowBean对象
        BinlogRowBean binlogRowBean = dealOneRow(entry.getBody().getRows().get(0), entry.getHeader().getEventType(), entry.getHeader().getTableName());

        //获取物理表名
        String tableName = binlogRowBean.getTableName();
        //判断是否为分表,如果是则截取最后分隔符下划线“_”之前的逻辑名称
        String logicTableName;
        if (tableName.matches(Constant.TABLE_SUFFIX_PATTERN)){
            logicTableName = StringUtils.substringBeforeLast(tableName, "_");
        } else {
            logicTableName = tableName;
        }

        //根据逻辑表名从配置文件中获取定义的数据提取规则
        String tableDefineRule = FlinkReadProperties.getProperty(logicTableName);
        //没有MySQL到HBase的映射规则,
        if (StringUtils.isBlank(tableDefineRule)){
            LOGGER.error("MySQL数据库表" + logicTableName + "没有定义和HBase表的映射关系");
            return;
        }

        //根据配置文件中定义的规则,获取rowKey和需要存储到HBase的业务数据
        TableFieldRelation tableFieldRelation = JSON.parseObject(tableDefineRule, TableFieldRelation.class);
        String rowKey = RowKeyUtil.generateRowKey(binlogRowBean, tableFieldRelation.getRowKey());
        Map<String, String> hBaseDataMap;
        if (binlogRowBean.getEventType() == EventType.DELETE){
            hBaseDataMap = generateHBaseDataMap(tableFieldRelation, binlogRowBean.getBeforeColumns());
        } else {
            hBaseDataMap = generateHBaseDataMap(tableFieldRelation, binlogRowBean.getAfterColumns());
        }

//        LOGGER.debug("订阅表名:" + logicTableName + ", RowKey: " + rowKey);

        //获取TransformationEntry Factory
        AbstractDataTransformation dataTransformation = DataTransformationFactory.getDataTransformation(logicTableName);

        //生成TransformationEntry
        List<TransformationEntry> transformationEntryList = dataTransformation.parseToTransformationEntry(logicTableName, rowKey, hBaseDataMap, binlogRowBean);
        if (CollectionUtils.isEmpty(transformationEntryList)){
            return;
        }
        //Collect
        for (TransformationEntry transformationEntry : transformationEntryList){
            collector.collect(transformationEntry);
        }
    }

    /**
     * 生成HBase Column Data Map: key为Hbase Column Name, value为Column value
     * @param tableFieldRelation
     * @param dbDataMap
     * @return
     */
    public Map<String, String> generateHBaseDataMap(TableFieldRelation tableFieldRelation, Map<String, Object> dbDataMap){
        List<String> columnList = Arrays.asList(tableFieldRelation.getField().split(","));
        return generateHBaseDataMap(columnList, dbDataMap);
    }

    /**
     * 生成HBase Column Data Map: key为Hbase Column Name, value为Column value
     * @param columnList
     * @param dbDataMap
     * @return
     */
    public Map<String, String> generateHBaseDataMap(List<String> columnList, Map<String, Object> dbDataMap){
        ColumnAliasMappingBean columnAliasMappingBean = null;
        String dbColumnName = null;
        Map<String, String> hbaseDataMap = new HashMap<>();
        for (String columnStr : columnList) {
            //HBase以别名存储,转换成column映射对象
            columnAliasMappingBean = columnAlias(columnStr);

            //将hbase column name 和 value 写入 data map
            dbColumnName = columnAliasMappingBean.getDbColumnName();
            String value = dbDataMap.get(dbColumnName) == null ? "" : String.valueOf(dbDataMap.get(dbColumnName));
            hbaseDataMap.put(columnAliasMappingBean.getHbaseColumnName(), value);
        }
        return hbaseDataMap;
    }

    /**
     * 将field 字符串转换成Column Name映射对象,获取dbColumnName 和 hbaseColumnName
     * @param fieldStr
     * @return
     */
    private ColumnAliasMappingBean columnAlias(String fieldStr){
        if (StringUtils.isBlank(fieldStr)){
            return new ColumnAliasMappingBean("","");
        }

        //别名映射对象
        ColumnAliasMappingBean columnAliasMappingBean = null;
        String[] splitNameArr = fieldStr.split(" as ");
        //别名映射
        if (splitNameArr.length == 2){
            columnAliasMappingBean = new ColumnAliasMappingBean(splitNameArr[0].trim(),splitNameArr[1].trim());
        } else {
            columnAliasMappingBean = new ColumnAliasMappingBean(splitNameArr[0].trim(),splitNameArr[0].trim());
        }
        return columnAliasMappingBean;
    }

    public static BinlogRowBean dealOneRow(Row row, EventType eventType, String tableName) {
        //beforeColumns转换为map list
        List<Column> columns = row.getBeforeColumns();
        Map<String, Object> beforeColumns = dealColumns(columns);

        //afterColumns转换为map list
        columns = row.getAfterColumns();
        Map<String, Object> afterColumns = dealColumns(columns);

        //对比数据,变化的数据保存到changedColumns
        Map<String, Object> changedColumns = compareValue(beforeColumns, afterColumns, eventType);

        BinlogRowBean binlogRowBean = new BinlogRowBean();
        binlogRowBean.setTableName(tableName);
        binlogRowBean.setEventType(eventType);
        binlogRowBean.setBeforeColumns(beforeColumns);
        binlogRowBean.setAfterColumns(afterColumns);
        binlogRowBean.setChangedColumns(changedColumns);

        return binlogRowBean;
    }

    public static Map<String, Object> compareValue(Map<String, Object> beforeColumns, Map<String, Object> afterColumns, EventType eventType) {
        Map<String, Object> changedColumns = new HashMap<>();
        return changedColumns;
    }

    public static Map<String, Object> dealColumns(List<Column> columns) {
        Map<String, Object> map = new HashMap<>();
        if (CollectionUtils.isEmpty(columns)) {
            return map;
        }

        for (Column column : columns) {
            map.put(column.getName().toUpperCase(), column.getValue());
        }
        return map;
    }
}

======================================================================================

/**
 * HBase Util 类,HBase表数据的增删改查操作<br>
 */
public class HBaseUtil {

    private static final Logger LOGGER = LoggerFactory.getLogger(HBaseUtil.class);

    private static final String HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum";
    private static final String HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT = "hbase.zookeeper.property.clientPort";
    private static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent";
    private static final String HADOOP_USER_NAME = "hadoop.user.name";

    private static Connection conn;
    private static Properties properties;
    static {
        properties = PropertyUtil.getInstance().getProperties();
        conn = initHBaseConnection(properties);
    }

    public static Connection getConn() {
        return conn;
    }

    /**
     * 初始化HBase Connection
     * @param properties
     * @return
     */
    public static Connection initHBaseConnection(Properties properties){
        Configuration config = HBaseUtil.initHBaseConfiguration(properties);
        User user = User.create(UserGroupInformation.createRemoteUser(properties.getProperty("hbase.client.user")));
        try {
            return ConnectionFactory.createConnection(config, user);
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase集群连接失败,集群信息:" + properties.getProperty(HBASE_ZOOKEEPER_QUORUM));
            return null;
        }
    }

    /**
     * 初始化HBase Configuration
     * @param properties
     * @return
     */
    public static Configuration initHBaseConfiguration(Properties properties){
        Configuration hconf = HBaseConfiguration.create();

        hconf.set(HConstants.ZOOKEEPER_QUORUM, properties.getProperty(HBASE_ZOOKEEPER_QUORUM));
        hconf.set(HConstants.ZOOKEEPER_CLIENT_PORT, properties.getProperty(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT));
        hconf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, properties.getProperty(ZOOKEEPER_ZNODE_PARENT));
        hconf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 120000);
        hconf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 180000);
        hconf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 240000);
        hconf.set(HADOOP_USER_NAME, properties.getProperty("hbase.client.user"));

        return hconf;
    }

    /**
     * 插入数据
     * @param tableName
     * @param rowKey
     * @param columnFamily
     * @param qualifier
     * @param data
     */
    public static boolean putData(String tableName, String rowKey, String columnFamily,String qualifier,String data){
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Put put = new Put(rowKey.getBytes());
            put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier), Bytes.toBytes(data));
            table.put(put);
            return true;
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + data);
            return false;
        } finally {
            closeTable(table);
        }
    }

    /**
     * 保存数据入HBase表
     * @param tableName
     * @param rowKey
     * @param columnFamily
     * @param dataMap
     */
    public static void putData(String tableName, String rowKey, String columnFamily, Map<String,String> dataMap) {
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Put put = getPut(rowKey, columnFamily, dataMap);
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + JSON.toJSONString(dataMap));
        } finally {
            closeTable(table);
        }
    }

    /**
     * 枷锁
     * @param tableName
     * @param rowKey
     * @param columnFamily
     * @param checkQualifier
     * @param checkValue
     * @param dataMap
     */
    public static boolean putDataWithLock(String tableName, String rowKey, String columnFamily, String checkQualifier, String checkValue ,Map<String,String> dataMap){
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Put put = getPut(rowKey, columnFamily, dataMap);
            return table.checkAndPut(Bytes.toBytes(rowKey), Bytes.toBytes(columnFamily), Bytes.toBytes(checkQualifier), Bytes.toBytes(checkValue), put);
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 保存数据失败, Row Key:" + rowKey +", Data: " + JSON.toJSONString(dataMap));
            return false;
        } finally {
            closeTable(table);
        }
    }

    /**
     * 获取Put
     * @param rowKey
     * @param columnFamily
     * @param dataMap
     * @return
     */
    private static Put getPut(String rowKey, String columnFamily, Map<String,String> dataMap){
        Put put = new Put(Bytes.toBytes(rowKey));
        if(dataMap != null){
            Set<Map.Entry<String,String>> set = dataMap.entrySet();
            for(Map.Entry<String,String> entry : set){
                String key = entry.getKey();
                String value = entry.getValue();
                put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(key), Bytes.toBytes(value));
            }
        }
        return put;
    }

    /**
     * 获取HBase 指定列数据
     * @param tableName
     * @param rowKey
     * @param columnFamily
     * @param columnName
     * @return
     */
    public static String getColumnData(String tableName, String rowKey, String columnFamily, String columnName) {
        Table table = null;
        byte[] valueBytes = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Get get = new Get(Bytes.toBytes(rowKey));
            Result result =table.get(get);
            valueBytes = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName));
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey +", Column Name: " + columnName);
        } finally {
            closeTable(table);
        }
        if(valueBytes == null){
            return null;
        } else {
            return new String(valueBytes);
        }
    }

    /**
     * 查询一行
     * @param tableName
     * @param rowKey
     * @return
     */
    public static Map<String, Object> getData(String tableName, String rowKey) {
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Get get = new Get(Bytes.toBytes(rowKey));
            Result result = table.get(get);
            return resultToMap(result, null);
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey);
            return new HashMap<>();
        } finally {
            closeTable(table);
        }
    }

    /**
     * 查询一行, 返回对应列族中所有列
     * @param tableName
     * @param rowKey
     * @return
     */
    public static Map<String, String> getData(String tableName, String rowKey, String columnFamily) {
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Get get = new Get(Bytes.toBytes(rowKey));
            Result result = table.get(get);
            Map resultMap = resultToMap(result, columnFamily);
            return resultMap.get(columnFamily) == null ? new HashMap<>() : (HashMap<String, String>)resultMap.get(columnFamily);
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 获取数据失败, Row Key:" + rowKey);
            return new HashMap<>();
        } finally {
            closeTable(table);
        }
    }

    /**
     * 扫描获取所有数据(数据量不大)
     * @param tableName
     * @param prefix
     * @return
     */
    public static List<Map<String, Object>> scanDataByRowPrefix(String tableName, String prefix){
        Table table = null;
        List<Map<String, Object>> resultList = new ArrayList<>();
        try {
            table = HBaseUtil.getConn().getTable(TableName.valueOf(tableName));
            Scan scan = new Scan();
            scan.setRowPrefixFilter(Bytes.toBytes(prefix));
            ResultScanner scanner = table.getScanner(scan);
            for (Result result = scanner.next(); result != null; result = scanner.next()){
                resultList.add(resultToMap(result, null));
            }
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " Scan数据失败, Row prefix:" + prefix);
        } finally {
            closeTable(table);
            return resultList;
        }
    }

    /**
     * 根据Filter进行扫描,并返回指定列族中的所有列
     * @param tableName
     * @param columnFamily
     * @param filter
     * @return
     */
    public static List<Map<String, String>> scanDataByFilter(String tableName, String columnFamily, String startRow, String stopRow,Filter filter){
        Table table = null;
        List<Map<String, String>> resultList = new ArrayList<>();
        try {
            table = HBaseUtil.getConn().getTable(TableName.valueOf(tableName));
            Scan scan = new Scan();
            scan.setFilter(filter);
            scan.setStartRow(Bytes.toBytes(startRow));
            scan.setStopRow(Bytes.toBytes(stopRow));
            scan.setCaching(100);
            ResultScanner scanner = table.getScanner(scan);

            Map resultMap;
            for (Result result = scanner.next(); result != null; result = scanner.next()){
                resultMap = resultToMap(result, columnFamily);
                resultList.add(resultMap.get(columnFamily) == null ? new HashMap<>() : (HashMap<String, String>)resultMap.get(columnFamily));
            }
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " Scan数据失败, Row prefix:" + JSON.toJSONString(filter));
        } finally {
            closeTable(table);
            return resultList;
        }
    }

    /**
     * 根据rowKey 删除数据
     * @param tableName
     * @param rowKey
     */
    public static boolean deleteData(String tableName, String rowKey) {
        Table table = null;
        try {
            table = conn.getTable(TableName.valueOf(tableName));
            Delete delete = new Delete(Bytes.toBytes(rowKey));
            table.delete(delete);
            return true;
        } catch (IOException e) {
            e.printStackTrace();
            LOGGER.error("HBase表 "+ tableName + " 删除数据失败, Row Key:" + rowKey);
            return false;
        } finally {
            closeTable(table);
        }
    }

    /**
     * 关闭HBase table
     * @param table
     */
    public static void closeTable(Table table){
        if (table != null){
            try {
                table.close();
            } catch (IOException e) {
                e.printStackTrace();
                LOGGER.error("HBase表 "+ table.getName() + " 关闭失败");
            }
        }
    }


    /**
     * 把result转换成map,方便返回json数据, 如果columnFamily为空则返回所有的列族,如果columnFamily不为空则返回列族中所有的列
     * 格式:{rowKey=r1, author={name=zhangsan}, article={title=this is title, content=this is content}}
     * @param result
     * @return
     */
    public static Map<String, Object> resultToMap(Result result, String columnFamily) {
        Map<String, Object> resMap = new HashMap<>();
        if (result == null || result.listCells() == null || result.listCells().isEmpty()){
            return resMap;
        }
        String rowKey = "";
        for (Cell cell : result.listCells()) {
            //获取Row Family Qualifier Value
            String tempRow = Bytes.toString(CellUtil.cloneRow(cell));
            //rowKey 赋值
            rowKey = tempRow;

            String tempFamily = Bytes.toString(CellUtil.cloneFamily(cell));
            String tempQualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
            String tempValue = Bytes.toString(CellUtil.cloneValue(cell));

            //如果columnFamily 不为空,并且列族不匹配则跳过
            if (StringUtils.isNotBlank(columnFamily) && !StringUtils.equals(tempFamily, columnFamily)){
                continue;
            }

            //从resMap中获取family Map,然后将qualifier和value以键值对的形式放入family map中
            Map<String, String> familyMap;
            if (resMap.get(tempFamily) == null){
                familyMap = new HashMap<>();
                resMap.put(tempFamily,familyMap);
            } else {
                familyMap = (HashMap<String, String>)resMap.get(tempFamily);
            }
            familyMap.put(tempQualifier,tempValue);

        }
        resMap.put("rowKey", rowKey);

        return resMap;
    }

    /**
     * 通过字符串+1 获取stopRow
     * @param row
     * @return
     */
    public static String stopRowByAddOne(String row){
        int length = row.length();
        char lastChar = row.charAt(length - 1);
        return row.substring(0, length - 1) + (char)(lastChar + 1);
    }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值