在Flink sql client执行如下数据去重任务报错:java.lang.IllegalArgumentException: Cannot write delete files in a v1 table的解决办法
在Flink sql client执行如下数据去重任务报错:java.lang.IllegalArgumentException: Cannot write delete files in a v1 table
create catalog hadoop_catalog with (
'type' = 'iceberg'
'catalog-type' = 'hadoop'
'catalog-name' = 'hadoop catalog' ,
'warehouse' = 'alluxio:xx.xx.xx.xx:19998/iceberg/warehouse/iceberg_db'
'property-version' = '2' ,
'format-version' = '2'
) ;
use catalog hadoop_catalog;
create database if not exists iceberg2iceberg_dwd_db;
use iceberg2iceberg_dwd_db;
CREATE TABLE if not exists student_deduplication(
id INT ,
name STRING,
test STRING,
score INT ,
process time TIMESTAMP ( 3 ) ,
row_num BIGINT ,
PRIMARY KEY ( id) NOT ENFORCED
) ;
INSERT INTO hadoop_catalog. iceberg2iceberg_dwd_db. student_deduplication( id, name, test, score, process_time, row_num)
SELEcT id, name, test, score, process_time, row num
FROM (
SELECT id, name, test, score, process time ,
ROW NUMBER( ) OVER ( PARTITION BY ( name, score) ORDER BY process_time DESC ) As row_num
FROM hadoop_catalog. mysql_kafka2iceberg_db. student_original)
WHERE row_num= 1 ;
通过在建表时加上如下两条属性信息解决:
'property-version' = '2' ,
'format-version' = '2'
添加过后,可执行的完整sql
create catalog hadoop_catalog with (
'type' = 'iceberg'
'catalog-type' = 'hadoop'
'catalog-name' = 'hadoop catalog' ,
'warehouse' = 'alluxio:xx.xx.xx.xx:19998/iceberg/warehouse/iceberg db'
'property-version' = '2' ,
'format-version' = '2'
) ;
use catalog hadoop_catalog;
create database if not exists iceberg2iceberg_dwd_db;
use iceberq2iceberg dwd_db;
CREATE TABLE if not exists student_deduplication(
id INT ,
name STRING,
test STRING,
score INT ,
process time TIMESTAMP ( 3 ) ,
row_num BIGINT ,
PRIMARY KEY ( id) NOT ENFORCED
) WITH (
'property-version' = '2' ,
'format-version' = '2'
) ;
INSERT INTO hadoop_catalog. iceberg2iceberg_dwd_db. student_deduplication( id, name, test, score, process_time, row_num)
SELEcT id, name, test, score, process_time, row_num
FROM (
SELECT id, name, test, score, process time ,
ROW NUMBER( ) OVER ( PARTITION BY ( name, score) ORDER BY process_time DESC ) As row_num
FROM hadoop_catalog. mysql_kafka2iceberg_db. student_original)
WHERE row_num= 1 ;