phoenix建表的时候要设置主键(在hbase就是作为rowkey存在的)
public class Phoenix_create {
public static void main(String[] args) throws Exception {
Connection connection = null;
PreparedStatement preparedStatement = null;
Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
Properties properties = new Properties();
properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
connection = DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);
// 建表
preparedStatement = connection.prepareStatement(
" create table refund_rate (" +
"rowkey bigint primary key ," +
"window_start TIMESTAMP , " +
"window_end TIMESTAMP , " +
"regionName varchar(200) ," +
"lv varchar(200) ," +
"rk bigint " +
" ) ");
preparedStatement.execute();
preparedStatement.close();
connection.close();
}
}
SQL读取kafka数据,然后通过phoenix存入hbase。
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// intervalJoin 订单、订单详情
// Refund_rate.interverJoinToKafka(env);
StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
//TODO <1>flinkSQl优化配置
TableConfig config = tenv.getConfig();
// 配置状态保持时间
config.setIdleStateRetention(Duration.ofMinutes(60));
//加入其他参数
Configuration configuration = config.getConfiguration();
// 开启 miniBatch
//微批处理通过增加延迟换取高吞吐,如果有超低延迟的要求,不建议开启微批处理。通
//常对于聚合的场景,微批处理可以显著的提升系统性能,建议开启。
configuration.setString("table.exec.mini-batch.enabled", "true");
// 批量输出的间隔时间
configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
// 防止 OOM 设置每个批次最多缓存数据的条数,这里设为五千条
configuration.setString("table.exec.mini-batch.size", "5000");
// 开启 LocalGlobal 两阶段聚合
configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
// 开启 Split Distinct 解决 COUNT DISTINCT 热点问题
configuration.setString("table.optimizer.distinct-agg.split.enabled", "true");
// 第一层打散的 bucket 数目
configuration.setString("table.optimizer.distinct-agg.split.bucket-num", "1024");
// 指定时区
configuration.setString("table.local-time-zone", "Asia/Shanghai");
// TopN 的缓存条数
configuration.setString("table.exec.topn.cache-size", "50000");
// as TO_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP('createTime'), 'yyyy-MM-dd HH:mm:ss'))
tenv.executeSql("create table kafkaOrder(" +
"orderId BIGINT ," +
"goodsId BIGINT ," +
"goodsName STRING ," +
"isRefund STRING ," +
"regionId BIGINT ," +
"regionName STRING ," +
"cityId BIGINT ," +
"cityName STRING ," +
"createTime TIMESTAMP(3) ," +
"watermark for createTime as createTime - interval '5' second " +
") WITH (" +
"'connector' = 'kafka'," +
"'topic' = 'ware_lsx_orderWide'," +
"'properties.bootstrap.servers' = 'hadoop101:9092,hadoop102:9092,hadoop103:9092', " +
"'properties.group.id' = ' ware_lsx_orderWide '," +
"'scan.startup.mode' = 'earliest-offset'," +
"'format' = 'json'" +
")");
// tenv.executeSql(" desc kafkaOrder").print();
// tenv.executeSql("select * from kafkaOrder ").print();
//TODO 每个区每个城市 商品退款率
// tenv.executeSql(" select regionName,cityName,count(distinct goodsId) c from kafkaOrder group by regionName,cityName ").print();
Table query = tenv.sqlQuery("" +
"select 0 rowkey,window_start ,window_end ,regionName,concat(cast( lvs*100 as char),'%') lv,cast(rk as int) rk from " +
" (select window_start ,window_end ,regionName,lvs,row_number() over(partition by window_start,window_end order by lvs desc) rk from " +
" (select window_start ,window_end ,regionName ,round( ( (ct_noRf*1.0) / ct_all ),1) lvs from (" +
" select " +
" window_start ,window_end ,regionName ," +
" count(distinct (case when isRefund = '1' then goodsId else 0 end) ) ct_noRf ," +
" count(distinct goodsId) ct_all " +
" from TABLE ( TUMBLE(TABLE kafkaOrder,DESCRIPTOR(createTime),INTERVAL '1' hour) ) " +
" group by window_start , window_end ,regionName " +
" ) " +
" ) " +
") where rk <=2 ");
// concat(cast( round( (ct_noRf/ct_all)*100,0) as char),'%')
DataStream<Tuple2<Boolean, Refund_Bean>> refund_Stream = tenv.toRetractStream(query, Refund_Bean.class);
refund_Stream.print();
refund_Stream.addSink(new RichSinkFunction<Tuple2<Boolean, Refund_Bean>>() {
private int a;
private Connection connection = null;
private PreparedStatement preparedStatement = null;
@Override
public void invoke(Tuple2<Boolean, Refund_Bean> value, SinkFunction.Context context) throws Exception {
Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
Properties properties = new Properties();
properties.setProperty("phoenix.schema.isNamespaceMappingEnabled", "true");
properties.setProperty("phoenix.schema.mapSystemTablesToNamespace", "true");
connection = DriverManager.getConnection("jdbc:phoenix:hadoop101:2181",properties);
preparedStatement = connection.prepareStatement("upsert into refund_rate values (?,?,?,?,?,?)");
preparedStatement.setInt(1,a);
a++;
preparedStatement.setTimestamp(2,value.f1.getWindow_start());
preparedStatement.setTimestamp(3,value.f1.getWindow_end());
preparedStatement.setString(4, value.f1.getRegionName());
preparedStatement.setString(5,value.f1.getLv());
preparedStatement.setInt(6,value.f1.getRk());
preparedStatement.executeUpdate();
connection.commit();
}
@Override
public void close() throws Exception {
if (connection!=null){
connection.close();
}
if (preparedStatement!=null){
preparedStatement.close();
}
}
});
env.execute();
}
这里存入的是SQl写的一个指标,通过回撤流以对象返回之后,最终addsink通过phoenix存入hbase
中间用到的实体类是:
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Refund_Bean {
private Integer rowkey;
private Timestamp window_start;
private Timestamp window_end;
private String regionName;
private String lv;
private Integer rk;
}