数据去重方案
oracle列转行函数LISTAGG
背景:在订单操作历史表中查询出最早操作改订单的人.
思考:百度搜索oracle列转行函数LISTAGG,然后再截取,达到去重。可是mysql库没有这样的函数。在去搜索mysql的类似方式,发现用GROUP_CONCAT 、GROUP和 ORFER BY 关键字即可实现。发现oracle 去重也有另一个方法去实现行去重 。
1.1 oracle列转行函数LISTAGG用
-- Create table
create table TBL_A
(
col_id NVARCHAR2(32) not null,
col_order_id NVARCHAR2(64),
col_creator NVARCHAR2(64),
col_created_time DATE
)tablespace USERS;
-- Create/Recreate primary, unique and foreign key constraints
alter table TBL_A
add constraint COL_ID primary key (COL_ID)
using index
tablespace USERS;
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1001', 'd1001', '张三', to_date('15-07-2022 16:43:54', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1002', 'd1001', '李四', to_date('16-07-2022 16:44:51', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1003', 'd1001', '王五', to_date('17-07-2022 16:45:42', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1004', 'd1002', '诸葛', to_date('18-07-2022 16:43:54', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1005', 'd1002', '赵云', to_date('16-07-2022 16:44:51', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1006', 'd1002', '曹操', to_date('17-07-2022 16:45:42', 'dd-mm-yyyy hh24:mi:ss'));
listagg 最基本用法
SELECT listagg(to_char(col_creator),',') within group(order by col_created_time) name from tbl_a;
-- 结果: name
-- 张三,李四,赵云,曹操,王五,诸葛
分组用法
SELECT col_order_id,listagg(to_char(col_creator),',') within group(order by col_created_time) NAME from tbl_a group by col_order_id;
-- 结果
row_number col_order_id NAME
-- 1 d1001 张三,李四,王五
-- 2 d1002 赵云,曹操,诸葛
查询出最早操作改订单的人
SELECT M, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME
FROM (SELECT ROW_NUMBER() OVER(PARTITION BY COL_ORDER_ID ORDER BY COL_CREATED_TIME DESC) M,
COL_ORDER_ID,
COL_CREATOR,
COL_CREATED_TIME FROM TBL_A)
WHERE M = 1
mysql 替代方式
GROUP_CONCAT行转列
select col_order_id,GROUP_CONCAT(col_creator order by col_created_time) col_creator
from tbl_a group by col_order_id;
-- 结果
col_order_id col_creator
d1001, "张三,李四,王五"
d1002, "赵云,曹操,诸葛"
实现数据去重
select col_order_id,SUBSTRING_INDEX(GROUP_CONCAT(col_creator order by col_created_time),',',1) col_creator
from tbl_a group by col_order_id;
-- 结果
col_order_id col_creator
d1001, 张三
d1002, 赵云
建表语句
create table tbl_a
(
col_id varchar(32) not null
primary key,
col_order_id varchar(64) null,
col_creator varchar(64) null,
col_created_time datetime null
)
collate = utf8_bin;
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1001', 'd1001', '张三', '2022-07-15 16:43:54');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1002', 'd1001', '李四', '2022-07-16 16:44:51');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1003', 'd1001', '王五', '2022-07-17 16:45:42');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1004', 'd1002', '诸葛', '2022-07-18 16:43:54');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1005', 'd1002', '赵云', '2022-07-16 16:44:51');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1006', 'd1002', '曹操', '2022-07-17 16:45:42');