phoenix API建表、通过filnkSQL将kafka数据写入phoenix&hbase

最新推荐文章于 2024-07-19 21:08:24 发布

@anSon_

最新推荐文章于 2024-07-19 21:08:24 发布

阅读量1.1k

点赞数 1

文章标签： kafka hbase java flink 大数据

本文链接：https://blog.csdn.net/weixin_45408791/article/details/126706667

版权

phoenix建表的时候要设置主键（在hbase就是作为rowkey存在的）

public class Phoenix_create {


    public static void main(String[] args) throws Exception {

        Connection connection = null;
        PreparedStatement preparedStatement = null;

        Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
        Properties properties = new Properties();
        properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
        properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
        connection =  DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);               


        //  建表
        preparedStatement = connection.prepareStatement(
                " create table  refund_rate (" +
                        "rowkey        bigint  primary key ," +
                        "window_start  TIMESTAMP  , " +
                        "window_end    TIMESTAMP , " +
                        "regionName    varchar(200) ," +
                        "lv            varchar(200) ," +
                        "rk            bigint " +
                        " ) ");


        preparedStatement.execute();

        preparedStatement.close();
        connection.close();

    }

}

SQL读取kafka数据，然后通过phoenix存入hbase。

public static void main(String[] args) throws Exception {


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //  intervalJoin 订单、订单详情
        // Refund_rate.interverJoinToKafka(env);


        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);

        //TODO <1>flinkSQl优化配置

        TableConfig config = tenv.getConfig();
        // 配置状态保持时间
        config.setIdleStateRetention(Duration.ofMinutes(60));

        //加入其他参数
        Configuration configuration = config.getConfiguration();
        // 开启 miniBatch
        //微批处理通过增加延迟换取高吞吐，如果有超低延迟的要求，不建议开启微批处理。通
        //常对于聚合的场景，微批处理可以显著的提升系统性能，建议开启。
        configuration.setString("table.exec.mini-batch.enabled", "true");
        // 批量输出的间隔时间
        configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
        // 防止 OOM 设置每个批次最多缓存数据的条数，这里设为五千条
        configuration.setString("table.exec.mini-batch.size", "5000");

        // 开启 LocalGlobal     两阶段聚合
        configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
        // 开启 Split Distinct  解决 COUNT DISTINCT 热点问题
        configuration.setString("table.optimizer.distinct-agg.split.enabled", "true");
        // 第一层打散的 bucket 数目
        configuration.setString("table.optimizer.distinct-agg.split.bucket-num", "1024");

        // 指定时区
        configuration.setString("table.local-time-zone", "Asia/Shanghai");

        // TopN 的缓存条数
        configuration.setString("table.exec.topn.cache-size", "50000");




//  as TO_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP('createTime'), 'yyyy-MM-dd HH:mm:ss'))
        tenv.executeSql("create table kafkaOrder(" +
                "orderId BIGINT ," +
                "goodsId BIGINT ," +
                "goodsName STRING ," +
                "isRefund STRING ," +
                "regionId BIGINT ," +
                "regionName STRING ," +
                "cityId BIGINT ," +
                "cityName STRING ," +
                "createTime TIMESTAMP(3) ," +
                "watermark for createTime as createTime - interval '5' second " +
                ") WITH (" +
                "'connector' = 'kafka'," +
                "'topic' = 'ware_lsx_orderWide'," +
                "'properties.bootstrap.servers' = 'hadoop101:9092,hadoop102:9092,hadoop103:9092', " +
                "'properties.group.id' = ' ware_lsx_orderWide '," +
                "'scan.startup.mode' = 'earliest-offset'," +
                "'format' = 'json'" +
                ")");


      //  tenv.executeSql(" desc kafkaOrder").print();

     //  tenv.executeSql("select * from kafkaOrder ").print();


        //TODO  每个区每个城市 商品退款率

    //    tenv.executeSql(" select regionName,cityName,count(distinct goodsId) c from kafkaOrder group by regionName,cityName  ").print();



        Table query = tenv.sqlQuery("" +
                "select 0 rowkey,window_start ,window_end ,regionName,concat(cast( lvs*100 as char),'%') lv,cast(rk as int) rk from " +
                " (select window_start ,window_end ,regionName,lvs,row_number() over(partition by window_start,window_end order by lvs desc) rk  from " +
                "  (select window_start ,window_end ,regionName ,round( ( (ct_noRf*1.0) / ct_all ),1) lvs from (" +
                "    select " +
                "        window_start ,window_end ,regionName ," +
                "        count(distinct (case when isRefund = '1' then goodsId else 0 end) ) ct_noRf ," +
                "        count(distinct goodsId) ct_all " +
                "        from  TABLE ( TUMBLE(TABLE kafkaOrder,DESCRIPTOR(createTime),INTERVAL '1' hour) )  " +
                "    group by window_start , window_end ,regionName   " +
                "  )  " +
                " ) " +
                ")  where rk <=2  ");


        // concat(cast( round( (ct_noRf/ct_all)*100,0) as char),'%')



        DataStream<Tuple2<Boolean, Refund_Bean>> refund_Stream = tenv.toRetractStream(query, Refund_Bean.class);

        refund_Stream.print();


        refund_Stream.addSink(new RichSinkFunction<Tuple2<Boolean, Refund_Bean>>() {
            private int a;
            private Connection connection = null;
            private PreparedStatement preparedStatement = null;

            @Override
            public void invoke(Tuple2<Boolean, Refund_Bean> value, SinkFunction.Context context) throws Exception {
                Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
                Properties properties = new Properties();
                properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
                properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
                connection = DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);

                preparedStatement = connection.prepareStatement("upsert into refund_rate values (?,?,?,?,?,?)");

                preparedStatement.setInt(1,a);
                a++;
                preparedStatement.setTimestamp(2,value.f1.getWindow_start());
                preparedStatement.setTimestamp(3,value.f1.getWindow_end());
                preparedStatement.setString(4, value.f1.getRegionName());
                preparedStatement.setString(5,value.f1.getLv());
                preparedStatement.setInt(6,value.f1.getRk());


                preparedStatement.executeUpdate();
                connection.commit();

            }
            @Override
            public void close() throws Exception {

                if (connection!=null){
                    connection.close();
                }

                if (preparedStatement!=null){
                    preparedStatement.close();
                }

            }
        });

        env.execute();

    }

这里存入的是SQl写的一个指标，通过回撤流以对象返回之后，最终addsink通过phoenix存入hbase

中间用到的实体类是：

@Data
@AllArgsConstructor
@NoArgsConstructor

public class Refund_Bean {

    private Integer rowkey;
    private Timestamp window_start;
    private Timestamp  window_end;
    private String  regionName;
    private String  lv;
    private Integer  rk;
    
}