mysql 去重主表数据,两张表结构相同数据(百万级别)
每张表里大概130w数据,测试全部重复的情况下,去重
bank_flow 表
bank_flow_cope 表
表结构
CREATE TABLE yz_bank_flow_YXSF01
(
bd_id
int(20) NOT NULL AUTO_INCREMENT COMMENT ‘ID’,
case_no
varchar(10) DEFAULT NULL COMMENT ‘案件编号’,
u_id
varchar(64) DEFAULT NULL COMMENT ‘对应的唯一ID’,
cfr_id
varchar(10) DEFAULT NULL COMMENT ‘案件关系对应表ID’,
id_card_no
varchar(32) DEFAULT NULL COMMENT ‘证件号’,
card_no
varchar(32) DEFAULT ‘’ COMMENT ‘卡号’,
account_no
varchar(32) DEFAULT ‘’ COMMENT ‘账号’,
account_name
varchar(100) DEFAULT ‘’ COMMENT ‘账户名’,
trade_account_no
varchar(50) DEFAULT ‘’ COMMENT ‘交易账户’,
trade_account_name
varchar(150) DEFAULT ‘’ COMMENT ‘交易账户名’,
money
decimal(20,2) DEFAULT NULL COMMENT ‘金额’,
balance
decimal(20,2) DEFAULT NULL COMMENT ‘余额’,
trade_date
datetime DEFAULT NULL COMMENT ‘交易日期’,
digest
text COMMENT ‘摘要’,
remark
text COMMENT ‘备注’,
login_ip
varchar(50) DEFAULT NULL COMMENT ‘登录ID’,
login_mac
varchar(50) DEFAULT NULL COMMENT ‘登录物理地址’,
flow_id
varchar(100) DEFAULT NULL COMMENT ‘交易流水号’,
citation_no
varchar(32) DEFAULT NULL COMMENT ‘传票号’,
log_no
varchar(32) DEFAULT NULL COMMENT ‘日志号’,
voucher_no
varchar(32) DEFAULT NULL COMMENT ‘凭证号’,
teller_no
varchar(32) DEFAULT NULL COMMENT ‘交易柜员号’,
trade_type
varchar(3) DEFAULT NULL COMMENT ‘交易类型(in:收入 out:支出)’,
id
int(8) DEFAULT NULL COMMENT ‘id’,
trade_account_belong
varchar(200) DEFAULT NULL COMMENT ‘交易账号银行归属’,
card_type
varchar(4) DEFAULT NULL COMMENT ‘账号类型’,
open_bank
varchar(50) DEFAULT NULL COMMENT ‘开户网点’,
classify
varchar(2) DEFAULT NULL COMMENT ‘归类’,
trade_bank_name
varchar(50) DEFAULT NULL COMMENT ‘交易行名称’,
import_no
varchar(32) DEFAULT NULL COMMENT ‘导入批次号’,
import_time
datetime DEFAULT NULL COMMENT ‘导入时间’,
cash_tag
varchar(10) DEFAULT NULL COMMENT ‘现金标志’,
PRIMARY KEY (bd_id
),
KEY bank_flow_YXSF01_card_no
(card_no
) USING BTREE,
KEY bank_flow_YXSF01_id_card_no
(id_card_no
) USING BTREE,
KEY bank_flow_YXSF01_trade_account_no
(trade_account_no
) USING BTREE,
KEY bank_flow_YXSF01_login_ip
(login_ip
) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=17200239 DEFAULT CHARSET=utf8mb4 COMMENT=‘银行流水表’;
CREATE TABLE yz_bank_flow_all_add
(
bd_id
bigint(20) NOT NULL AUTO_INCREMENT COMMENT ‘ID’,
case_no
varchar(10) DEFAULT NULL COMMENT ‘案件编号’,
u_id
varchar(64) DEFAULT NULL COMMENT ‘对应的唯一ID’,
cfr_id
varchar(10) DEFAULT NULL COMMENT ‘案件关系对应表ID’,
id_card_no
varchar(32) DEFAULT NULL COMMENT ‘证件号’,
card_no
varchar(32) DEFAULT NULL COMMENT ‘卡号’,
account_no
varchar(32) DEFAULT NULL COMMENT ‘账号’,
account_name
varchar(100) DEFAULT NULL COMMENT ‘账户名’,
trade_account_no
varchar(50) DEFAULT NULL COMMENT ‘交易账户’,
trade_account_name
varchar(150) DEFAULT NULL COMMENT ‘交易账户名’,
money
decimal(20,2) DEFAULT NULL COMMENT ‘金额’,
balance
decimal(20,2) DEFAULT NULL COMMENT ‘余额’,
trade_date
datetime DEFAULT NULL COMMENT ‘交易日期’,
digest
longtext COMMENT ‘摘要’,
remark
longtext COMMENT ‘备注’,
login_ip
varchar(50) DEFAULT NULL COMMENT ‘登录ID’,
login_mac
varchar(50) DEFAULT NULL COMMENT ‘登录物理地址’,
flow_id
varchar(100) DEFAULT NULL COMMENT ‘交易流水号’,
citation_no
varchar(32) DEFAULT NULL COMMENT ‘传票号’,
log_no
varchar(32) DEFAULT NULL COMMENT ‘日志号’,
voucher_no
varchar(32) DEFAULT NULL COMMENT ‘凭证号’,
teller_no
varchar(32) DEFAULT NULL COMMENT ‘交易柜员号’,
trade_type
varchar(3) DEFAULT NULL COMMENT ‘交易类型(in:收入 out:支出)’,
id
bigint(20) DEFAULT NULL COMMENT ‘id’,
trade_account_belong
varchar(200) DEFAULT NULL COMMENT ‘交易账号银行归属’,
card_type
varchar(4) DEFAULT NULL COMMENT ‘账号类型’,
open_bank
varchar(50) DEFAULT NULL COMMENT ‘开户网点’,
classify
varchar(2) DEFAULT NULL COMMENT ‘归类’,
trade_bank_name
varchar(50) DEFAULT NULL COMMENT ‘交易行名称’,
import_no
varchar(32) DEFAULT NULL COMMENT ‘导入批次号’,
import_time
datetime DEFAULT NULL COMMENT ‘导入时间’,
cash_tag
varchar(10) DEFAULT NULL COMMENT ‘现金标志’,
PRIMARY KEY (bd_id
) USING BTREE,
KEY bank_flow_card_no
(card_no
),
KEY bank_flow_id_card_no
(id_card_no
),
KEY bank_login_ip
(login_ip
),
KEY case_no
(case_no
) USING BTREE,
KEY trade_account_no
(trade_account_no
),
KEY account_no
(account_no
),
KEY trade_date
(trade_date
),
KEY money
(money
)
) ENGINE=InnoDB AUTO_INCREMENT=21657855 DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC COMMENT=‘银行流水表’;
得到两张表的并集,查出主表在并集中出现的主键id
SELECT a.bd_id from yz_bank_flow_YXSF01 a STRAIGHT_JOIN yz_bank_flow_all_add b on a.account_no = b.account_no
AND a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type
//多重复去重
SELECT a.bd_id from yz_bank_flow_YXSF01 a STRAIGHT_JOIN
(SELECT * from yz_bank_flow_all_add where yz_bank_flow_all_add.bd_id in
(select max(bd_id) as id
from yz_bank_flow_all_add GROUP BY account_no,trade_account_no,money,trade_date,card_no) ) b on a.account_no = b.account_no
And a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type
AND b.import_no=#{importNo} and b.case_no =#{caseNo}
删除主表中重复的id,根据上面sql查出的数据
2.一部删除到位
DELETE from ${table} WHERE bd_id IN (
SELECT bd_id from (
SELECT a.bd_id from ${table} a STRAIGHT_JOIN yz_bank_flow_all_add b on a.account_no = b.account_no
AND a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type
AND b.import_no=#{importNo} and b.case_no =#{caseNo}
) as dd
)
##两张表对于同一条数据不存在多条记录删除
DELETE from ${table where ${table.bd_id in (
SELECT dd.id from(
select count(1) as cc,min(id)-1 as id from (
SELECT bd_id as id,account_no,trade_account_no,money,trade_date,card_no FROM yz_bank_flow_all_add
UNION ALL
SELECT bd_id-1 as id,account_no,trade_account_no,money,trade_date ,card_no FROM yz_bank_flow_YXSF01 where import_no=#{importNo} and case_no =#{caseNo}
) as c GROUP BY card_no,account_no,trade_account_no,money,trade_date having cc >1
) as dd