flink1.14.3集成hive-hbase

1、下载flink1.14

2、配置

配置系统环境变量/etc/profile和flink的配置文件flink-conf.yaml

/etc/profile 增加配置如下(这里默认jdk,haodoop都配置好):

#flink config
export FLINK_HOME=/opt/module/flink-1.14.3
export HADOOP_CLASSPATH=`hadoop classpath`
export PATH=$PATH:$FLINK_HOME/bin

flink-conf.yaml配置如下:

# 修改了一个task可以使用2个slot
taskmanager.numberOfTaskSlots: 2
# 增加一行
classloader.check-leaked-classloader: false

在bin/config.sh中第一行也添加了 以下环境变量

export HADOOP_CLASSPATH=`hadoop classpath`

3、设置启动配置

新增sql-conf.sql配置文件,配置hivecatalog

CREATE CATALOG myhive WITH (
    'type' = 'hive',
    'default-database' = 'default',
    'hive-conf-dir' = '/usr/hdp/3.1.4.0-315/hive/conf',
    'hadoop-conf-dir'='/usr/hdp/3.1.4.0-315/hadoop/etc/hadoop/'
);
-- set the HiveCatalog as the current catalog of the session
USE CATALOG myhive;

4、启动

#注意切换用户
su  hive;

# 在flink1.14.3目录下 执行以下命令
bin/yarn-session.sh -s 2 -jm 1024 -tm 2048 -nm flinkConnect -qu flink -d

bin/sql-client.sh embedded -i conf/sql-conf.sql -s yarn-session

5、DDL

# 设置sql执行格式为hive
set table.sql-dialect=hive;

#设置像hive一样的表显示
set sql-client.execution.result-mode = tableau;

6、测试

# 建hive表
CREATE TABLE hive_table (
   user_id STRING,
   order_amount DOUBLE
 ) PARTITIONED BY (dt STRING, hr STRING) STORED AS parquet TBLPROPERTIES (
   'partition.time-extractor.timestamp-pattern'='$dt $hr:00:00',
   'sink.partition-commit.trigger'='partition-time',
   'sink.partition-commit.delay'='1 h',
   'sink.partition-commit.policy.kind'='metastore,success-file'
 );
 
 # 建hbase表
-- shell:
  create 'flink_test22', {NAME => 'info', COMPRESSION => 'SNAPPY', TTL=>'FOREVER', VERSIONS => 3}
   put 'flink_test22','1004','info:sex','male'
   put 'flink_test22','1004','info:age','18'
   put 'flink_test22','1004','info:name','Tom'
-- FlinkSQL
CREATE TABLE Test_P (
  rowkey STRING,
  info ROW<age STRING,name STRING,sex STRING>,
  PRIMARY KEY (rowkey) NOT ENFORCED
 ) WITH (
  'connector' = 'hbase-2.2',
  'table-name' = 'flink_test22',
  'zookeeper.quorum' = 'tp001:2181,tp002:2181,tp003:2181',
  'zookeeper.znode.parent' = '/hbase-unsecure'
 );
-- 将hbase某张表数据存到另外一张表
insert into Test_P select rowkey,ROW(age,name,sex) from htable_test2;

# 创建kafka流表
{"uid":"1001","shoid":"1036","money":"111","ts":"1644477567913"}
CREATE TABLE kafka_table_test (
  `uid` STRING,
  `shoid` STRING,
  `money` STRING,
  `ts` TIMESTAMP(3) METADATA FROM 'timestamp'
) WITH (
  'connector' = 'kafka',
  'topic' = 'kafka_table_test',
  'properties.bootstrap.servers' = 'tp001:6667,tp002:6667,tp003:6667',
  'properties.group.id' = 'testGroup',
  'scan.startup.mode' = 'earliest-offset',
  'format' = 'json'
)

# 创建kafka数据写入hive的表
CREATE TABLE kafka_table_sink_hive (
   uid STRING,
   shoid STRING,
   money STRING,
   ts STRING
 ) PARTITIONED BY (dt STRING, hr STRING) STORED AS parquet TBLPROPERTIES (
   'partition.time-extractor.timestamp-pattern'='$dt $hr:00:00',
   'sink.partition-commit.trigger'='partition-time',
   'sink.partition-commit.delay'='1 h',
   'sink.partition-commit.policy.kind'='metastore,success-file'
 );
# 往hive表插入数据
insert into hive_table select '002',26,'2022-02-09','13';
insert into hive_table select '1001',25,'2022-02-10','11';
insert into hive_table select '1002',27,'2022-02-10','12';

#关联hive表和hbase表
select *
from hive_table a
left join hTable_test2 b
on a.user_id = b.rowkey
#关联kafka和hive表
select * from
kafka_table_test a
left join
hive_table b
on a.uid = b.user_id

select * from
kafka_table_test a
left join
hTable_test2 b
on a.uid = b.rowkey

#求和
select dt,count(1) from hive_table group by dt;

#!/bin/bash
bin/sql-client.sh embedded -i conf/sql-conf.sql -s yarn-session

CREATE TABLE Test_P (
   rowkey STRING,
     info ROW<age STRING,name STRING,sex STRING>,
       PRIMARY KEY (rowkey) NOT ENFORCED
        ) WITH (
          'connector' = 'hbase-2.2',
            'table-name' = 'flink_test22',
              'zookeeper.quorum' = 'tp001:2181,tp002:2181,tp003:2181',
                'zookeeper.znode.parent' = '/hbase-unsecure'
                 );
                 
insert into Test_P select rowkey,ROW(age,name,sex) from htable_test2;

# 数据文件
0001,zhangsan,99,98,100,school1,class1
0002,lisi,59,89,79,school2,class1
0003,wangwu,89,99,100,school3,class1
0004,zhangsan2,99,98,100,school1,class1
0005,lisi2,59,89,79,school2,class1
0006,wangwu2,89,99,100,school3,class1

# 创建普通表
create table score1
(id string comment 'ID', 
name string comment 'name',  
Chinese double comment 'Chinese',
English double comment 'English',
math double comment 'math',
school string comment 'school',
class string comment 'class')
comment 'score1'  
row format delimited fields terminated by ','
stored as textfile;

# 将文件放入hdfs文件系统
hadoop fs -put /opt/module/flink-1.14.3/testData/score.txt /flink1.14-test-data

# 加载本地数据进表
load data local inpath '/opt/module/flink-1.14.3/testData/score.txt' into table score1;


#直接把数据插入表中

1、hiveSQL语法
insert into table score1 values(0006,'wangwu2',89,99,100,'school3','class1'),(0005,'lisi2',59,89,79,'school2','class1');

2、FLinkSQL语法
INSERT INTO score1 SELECT 0004,'zhangsan2',99,98,100,'school1','class1'
 -- 注意:
 	flinkSQL不支持ACID表的操作
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值