Hudi+MinIO+JuiceFS存算分离
环境准备
官网文档已经很详细了https://juicefs.com/docs/zh/community/quick_start_guide
根据步骤来就行
JuiceFS集成Kerberos
export HADOOP_CONF_DIR=xxx
export KRB5_CONFIG=xxx
export HADOOP_USER_NAME=xxx
export KRB5KEYTAB_BASE64="cat xxx.keytab|base64"
export KRB5PRINCIPAL="xxx"
export JUICEFS_DEBUG=debug
export META_PASSWORD="xxx"
juicefs format \
--storage hdfs \
--access-key xxx \
--bucket hdfs://xxx \
"redis://xxx" \
redis-hdfs-jfs
执行后日志中能看到hdfs.go
输出kerberos相关信息说明成功,storage为minio等对象存储是不支持kerberos认证的,需要自行更改minio.go的代码
JuiceFs搭建数据湖
# 添加hadoop依赖
export HADOOP_CLASSPATH=`hadoop classpath`
# 启动yarn-session
bin/yarn-session.sh -s 1 -jm 2048 -tm 4096 -nm flink_sql -d
# 启动flink client
bin/sql-client.sh embedded -s yarn-session
# 设置ck时间
set execution.checkpointing.interval = 10s;
# sql语句
CREATE TABLE Orders (
order_number INT,
price DECIMAL(32,2),
order_time TIMESTAMP(3)
) WITH (
'connector' = 'datagen',
'rows-per-second' = '1',
'fields.order_number.kind' = 'sequence',
'fields.order_number.start' = '1',
'fields.order_number.end' = '100000'
);
create table jfs_sink(
ordertotal INT,
numtotal INT,
primary key (numtotal) not enforced
)
with (
'connector' = 'hudi',
'path' = 'jfs://mysql-minio-jfs/czs_orders',
'table.type' = 'MERGE_ON_READ',
'write.precombine.field' = 'numtotal'
);
insert into jfs_sink select 1 as ordertotal ,sum(order_number)*2 as numtotal from Orders;
# 查看文件是否生成
hadoop fs -ls jfs://mysql-minio-jfs/czs_orders
hadoop fs -ls jfs://redis-minio-jfs/
create table jfs_sink(
ordertotal INT,
numtotal INT,
primary key (numtotal) not enforced
)
with (
'connector' = 'hudi',
'path' = 'jfs://redis-minio-jfs//czs_orders_20230803',
'table.type' = 'MERGE_ON_READ',
'write.precombine.field' = 'numtotal'
);
# 创建hive外部表
CREATE EXTERNAL TABLE `flink_hudi_sink`(
ordertotal INT,
numtotal INT
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'jfs://myjfs/tb2';