phoenix API建表、通过filnkSQL将kafka数据写入phoenix&hbase

phoenix建表的时候要设置主键(在hbase就是作为rowkey存在的) 

public class Phoenix_create {


    public static void main(String[] args) throws Exception {

        Connection connection = null;
        PreparedStatement preparedStatement = null;

        Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
        Properties properties = new Properties();
        properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
        properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
        connection =  DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);               


        //  建表
        preparedStatement = connection.prepareStatement(
                " create table  refund_rate (" +
                        "rowkey        bigint  primary key ," +
                        "window_start  TIMESTAMP  , " +
                        "window_end    TIMESTAMP , " +
                        "regionName    varchar(200) ," +
                        "lv            varchar(200) ," +
                        "rk            bigint " +
                        " ) ");


        preparedStatement.execute();

        preparedStatement.close();
        connection.close();

    }

}

 SQL读取kafka数据,然后通过phoenix存入hbase。

public static void main(String[] args) throws Exception {


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //  intervalJoin 订单、订单详情
        // Refund_rate.interverJoinToKafka(env);


        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);

        //TODO <1>flinkSQl优化配置

        TableConfig config = tenv.getConfig();
        // 配置状态保持时间
        config.setIdleStateRetention(Duration.ofMinutes(60));

        //加入其他参数
        Configuration configuration = config.getConfiguration();
        // 开启 miniBatch
        //微批处理通过增加延迟换取高吞吐,如果有超低延迟的要求,不建议开启微批处理。通
        //常对于聚合的场景,微批处理可以显著的提升系统性能,建议开启。
        configuration.setString("table.exec.mini-batch.enabled", "true");
        // 批量输出的间隔时间
        configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
        // 防止 OOM 设置每个批次最多缓存数据的条数,这里设为五千条
        configuration.setString("table.exec.mini-batch.size", "5000");

        // 开启 LocalGlobal     两阶段聚合
        configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
        // 开启 Split Distinct  解决 COUNT DISTINCT 热点问题
        configuration.setString("table.optimizer.distinct-agg.split.enabled", "true");
        // 第一层打散的 bucket 数目
        configuration.setString("table.optimizer.distinct-agg.split.bucket-num", "1024");

        // 指定时区
        configuration.setString("table.local-time-zone", "Asia/Shanghai");

        // TopN 的缓存条数
        configuration.setString("table.exec.topn.cache-size", "50000");




//  as TO_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP('createTime'), 'yyyy-MM-dd HH:mm:ss'))
        tenv.executeSql("create table kafkaOrder(" +
                "orderId BIGINT ," +
                "goodsId BIGINT ," +
                "goodsName STRING ," +
                "isRefund STRING ," +
                "regionId BIGINT ," +
                "regionName STRING ," +
                "cityId BIGINT ," +
                "cityName STRING ," +
                "createTime TIMESTAMP(3) ," +
                "watermark for createTime as createTime - interval '5' second " +
                ") WITH (" +
                "'connector' = 'kafka'," +
                "'topic' = 'ware_lsx_orderWide'," +
                "'properties.bootstrap.servers' = 'hadoop101:9092,hadoop102:9092,hadoop103:9092', " +
                "'properties.group.id' = ' ware_lsx_orderWide '," +
                "'scan.startup.mode' = 'earliest-offset'," +
                "'format' = 'json'" +
                ")");


      //  tenv.executeSql(" desc kafkaOrder").print();

     //  tenv.executeSql("select * from kafkaOrder ").print();


        //TODO  每个区每个城市 商品退款率

    //    tenv.executeSql(" select regionName,cityName,count(distinct goodsId) c from kafkaOrder group by regionName,cityName  ").print();



        Table query = tenv.sqlQuery("" +
                "select 0 rowkey,window_start ,window_end ,regionName,concat(cast( lvs*100 as char),'%') lv,cast(rk as int) rk from " +
                " (select window_start ,window_end ,regionName,lvs,row_number() over(partition by window_start,window_end order by lvs desc) rk  from " +
                "  (select window_start ,window_end ,regionName ,round( ( (ct_noRf*1.0) / ct_all ),1) lvs from (" +
                "    select " +
                "        window_start ,window_end ,regionName ," +
                "        count(distinct (case when isRefund = '1' then goodsId else 0 end) ) ct_noRf ," +
                "        count(distinct goodsId) ct_all " +
                "        from  TABLE ( TUMBLE(TABLE kafkaOrder,DESCRIPTOR(createTime),INTERVAL '1' hour) )  " +
                "    group by window_start , window_end ,regionName   " +
                "  )  " +
                " ) " +
                ")  where rk <=2  ");


        // concat(cast( round( (ct_noRf/ct_all)*100,0) as char),'%')



        DataStream<Tuple2<Boolean, Refund_Bean>> refund_Stream = tenv.toRetractStream(query, Refund_Bean.class);

        refund_Stream.print();


        refund_Stream.addSink(new RichSinkFunction<Tuple2<Boolean, Refund_Bean>>() {
            private int a;
            private Connection connection = null;
            private PreparedStatement preparedStatement = null;

            @Override
            public void invoke(Tuple2<Boolean, Refund_Bean> value, SinkFunction.Context context) throws Exception {
                Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
                Properties properties = new Properties();
                properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
                properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
                connection = DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);

                preparedStatement = connection.prepareStatement("upsert into refund_rate values (?,?,?,?,?,?)");

                preparedStatement.setInt(1,a);
                a++;
                preparedStatement.setTimestamp(2,value.f1.getWindow_start());
                preparedStatement.setTimestamp(3,value.f1.getWindow_end());
                preparedStatement.setString(4, value.f1.getRegionName());
                preparedStatement.setString(5,value.f1.getLv());
                preparedStatement.setInt(6,value.f1.getRk());


                preparedStatement.executeUpdate();
                connection.commit();

            }
            @Override
            public void close() throws Exception {

                if (connection!=null){
                    connection.close();
                }

                if (preparedStatement!=null){
                    preparedStatement.close();
                }

            }
        });

        env.execute();

    }

这里存入的是SQl写的一个指标,通过回撤流以对象返回之后,最终addsink通过phoenix存入hbase

中间用到的实体类是:

@Data
@AllArgsConstructor
@NoArgsConstructor

public class Refund_Bean {

    private Integer rowkey;
    private Timestamp window_start;
    private Timestamp  window_end;
    private String  regionName;
    private String  lv;
    private Integer  rk;
    
}

 

可以使用Spark Streaming来将HBase数据写入Kafka。下面是一些大致的步骤: 1. 创建HBase的配置和Kafka的配置: ``` val hbaseConf = HBaseConfiguration.create() hbaseConf.set("hbase.zookeeper.quorum", "localhost") val kafkaProps = new Properties() kafkaProps.put("bootstrap.servers", "localhost:9092") kafkaProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") kafkaProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") ``` 2. 创建Spark Streaming Context: ``` val ssc = new StreamingContext(sparkConf, Seconds(10)) ``` 3. 创建HBase的DStream,从HBase表中读取数据: ``` val hbaseStream = HBaseUtils.createStream(ssc, hbaseConf, TableName.valueOf("my_table"), Array(Bytes.toBytes("cf")), Array(Bytes.toBytes("col1"), Bytes.toBytes("col2"))) ``` 4. 将HBase的DStream转换成Kafka的DStream,同时将HBase表中的每一行数据转换成Kafka的消息: ``` val kafkaStream = hbaseStream.map(record => new ProducerRecord[String, String]("my_topic", Bytes.toString(record._1.get()), Bytes.toString(record._2.getValue(Bytes.toBytes("cf"), Bytes.toBytes("col1"))))) ``` 5. 将Kafka的DStream写入Kafka: ``` kafkaStream.foreachRDD(rdd => { rdd.foreachPartition(partition => { val producer = new KafkaProducer[String, String](kafkaProps) partition.foreach(record => { producer.send(record) }) producer.close() }) }) ``` 注意:上面的代码只是一个大致的示例,你需要根据实际情况进行修改和调整。此外,还需要确保你的HBase表中有数据,并且你的Kafka已经启动。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值