1.问题背景
因为业务侧存在数据口径和ETL出错的问题,需要对数据历史部分数据进行删除后重新生成并写入的需求。
2.实现步骤
- 创建表
CREATE TABLE IF NOT EXISTS test1
(
`day` DATE NOT NULL COMMENT "default 1980-00-00",
`user_type` VARCHAR(20) COMMENT "",
`product` VARCHAR(20) COMMENT "",
`user_id` VARCHAR(20) COMMENT "",
`value` FLOAT COMMENT "",
`cut` FLOAT COMMENT ""
)
UNIQUE KEY(`day`,`user_type`,`product`,`user_id`)
PARTITION BY RANGE(`day`)
(
PARTITION `p201701` VALUES LESS THAN ("2017-02-01"),
PARTITION `p201702` VALUES LESS THAN ("2017-03-01"),
PARTITION `p201703` VALUES LESS THAN ("2017-04-01")
)
DISTRIBUTED BY HASH(`days`,`user_type`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
- 写入数据
INSERT INTO abtest VALUES ('2017-02-02','a','p_a','user1',20.1,10.1);
- 查询
select * from test1;
- 删除历史数据:根据分区+条件删除
delete from test1 PARTITION p201702 where day = '2017-02-02' and days=1 and user_type='a'