mysql 去重主表数据,两张表结构相同数据(百万级别)

mysql 去重主表数据,两张表结构相同数据(百万级别)

每张表里大概130w数据,测试全部重复的情况下,去重
bank_flow 表
bank_flow_cope 表
表结构

CREATE TABLE yz_bank_flow_YXSF01 (
bd_id int(20) NOT NULL AUTO_INCREMENT COMMENT ‘ID’,
case_no varchar(10) DEFAULT NULL COMMENT ‘案件编号’,
u_id varchar(64) DEFAULT NULL COMMENT ‘对应的唯一ID’,
cfr_id varchar(10) DEFAULT NULL COMMENT ‘案件关系对应表ID’,
id_card_no varchar(32) DEFAULT NULL COMMENT ‘证件号’,
card_no varchar(32) DEFAULT ‘’ COMMENT ‘卡号’,
account_no varchar(32) DEFAULT ‘’ COMMENT ‘账号’,
account_name varchar(100) DEFAULT ‘’ COMMENT ‘账户名’,
trade_account_no varchar(50) DEFAULT ‘’ COMMENT ‘交易账户’,
trade_account_name varchar(150) DEFAULT ‘’ COMMENT ‘交易账户名’,
money decimal(20,2) DEFAULT NULL COMMENT ‘金额’,
balance decimal(20,2) DEFAULT NULL COMMENT ‘余额’,
trade_date datetime DEFAULT NULL COMMENT ‘交易日期’,
digest text COMMENT ‘摘要’,
remark text COMMENT ‘备注’,
login_ip varchar(50) DEFAULT NULL COMMENT ‘登录ID’,
login_mac varchar(50) DEFAULT NULL COMMENT ‘登录物理地址’,
flow_id varchar(100) DEFAULT NULL COMMENT ‘交易流水号’,
citation_no varchar(32) DEFAULT NULL COMMENT ‘传票号’,
log_no varchar(32) DEFAULT NULL COMMENT ‘日志号’,
voucher_no varchar(32) DEFAULT NULL COMMENT ‘凭证号’,
teller_no varchar(32) DEFAULT NULL COMMENT ‘交易柜员号’,
trade_type varchar(3) DEFAULT NULL COMMENT ‘交易类型(in:收入 out:支出)’,
id int(8) DEFAULT NULL COMMENT ‘id’,
trade_account_belong varchar(200) DEFAULT NULL COMMENT ‘交易账号银行归属’,
card_type varchar(4) DEFAULT NULL COMMENT ‘账号类型’,
open_bank varchar(50) DEFAULT NULL COMMENT ‘开户网点’,
classify varchar(2) DEFAULT NULL COMMENT ‘归类’,
trade_bank_name varchar(50) DEFAULT NULL COMMENT ‘交易行名称’,
import_no varchar(32) DEFAULT NULL COMMENT ‘导入批次号’,
import_time datetime DEFAULT NULL COMMENT ‘导入时间’,
cash_tag varchar(10) DEFAULT NULL COMMENT ‘现金标志’,
PRIMARY KEY (bd_id),
KEY bank_flow_YXSF01_card_no (card_no) USING BTREE,
KEY bank_flow_YXSF01_id_card_no (id_card_no) USING BTREE,
KEY bank_flow_YXSF01_trade_account_no (trade_account_no) USING BTREE,
KEY bank_flow_YXSF01_login_ip (login_ip) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=17200239 DEFAULT CHARSET=utf8mb4 COMMENT=‘银行流水表’;

CREATE TABLE yz_bank_flow_all_add (
bd_id bigint(20) NOT NULL AUTO_INCREMENT COMMENT ‘ID’,
case_no varchar(10) DEFAULT NULL COMMENT ‘案件编号’,
u_id varchar(64) DEFAULT NULL COMMENT ‘对应的唯一ID’,
cfr_id varchar(10) DEFAULT NULL COMMENT ‘案件关系对应表ID’,
id_card_no varchar(32) DEFAULT NULL COMMENT ‘证件号’,
card_no varchar(32) DEFAULT NULL COMMENT ‘卡号’,
account_no varchar(32) DEFAULT NULL COMMENT ‘账号’,
account_name varchar(100) DEFAULT NULL COMMENT ‘账户名’,
trade_account_no varchar(50) DEFAULT NULL COMMENT ‘交易账户’,
trade_account_name varchar(150) DEFAULT NULL COMMENT ‘交易账户名’,
money decimal(20,2) DEFAULT NULL COMMENT ‘金额’,
balance decimal(20,2) DEFAULT NULL COMMENT ‘余额’,
trade_date datetime DEFAULT NULL COMMENT ‘交易日期’,
digest longtext COMMENT ‘摘要’,
remark longtext COMMENT ‘备注’,
login_ip varchar(50) DEFAULT NULL COMMENT ‘登录ID’,
login_mac varchar(50) DEFAULT NULL COMMENT ‘登录物理地址’,
flow_id varchar(100) DEFAULT NULL COMMENT ‘交易流水号’,
citation_no varchar(32) DEFAULT NULL COMMENT ‘传票号’,
log_no varchar(32) DEFAULT NULL COMMENT ‘日志号’,
voucher_no varchar(32) DEFAULT NULL COMMENT ‘凭证号’,
teller_no varchar(32) DEFAULT NULL COMMENT ‘交易柜员号’,
trade_type varchar(3) DEFAULT NULL COMMENT ‘交易类型(in:收入 out:支出)’,
id bigint(20) DEFAULT NULL COMMENT ‘id’,
trade_account_belong varchar(200) DEFAULT NULL COMMENT ‘交易账号银行归属’,
card_type varchar(4) DEFAULT NULL COMMENT ‘账号类型’,
open_bank varchar(50) DEFAULT NULL COMMENT ‘开户网点’,
classify varchar(2) DEFAULT NULL COMMENT ‘归类’,
trade_bank_name varchar(50) DEFAULT NULL COMMENT ‘交易行名称’,
import_no varchar(32) DEFAULT NULL COMMENT ‘导入批次号’,
import_time datetime DEFAULT NULL COMMENT ‘导入时间’,
cash_tag varchar(10) DEFAULT NULL COMMENT ‘现金标志’,
PRIMARY KEY (bd_id) USING BTREE,
KEY bank_flow_card_no (card_no),
KEY bank_flow_id_card_no (id_card_no),
KEY bank_login_ip (login_ip),
KEY case_no (case_no) USING BTREE,
KEY trade_account_no (trade_account_no),
KEY account_no (account_no),
KEY trade_date (trade_date),
KEY money (money)
) ENGINE=InnoDB AUTO_INCREMENT=21657855 DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC COMMENT=‘银行流水表’;

得到两张表的并集,查出主表在并集中出现的主键id

SELECT a.bd_id from yz_bank_flow_YXSF01 a STRAIGHT_JOIN yz_bank_flow_all_add b on a.account_no = b.account_no
AND a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type

//多重复去重
SELECT a.bd_id from yz_bank_flow_YXSF01 a STRAIGHT_JOIN
(SELECT * from yz_bank_flow_all_add where yz_bank_flow_all_add.bd_id in
(select max(bd_id) as id
from yz_bank_flow_all_add GROUP BY account_no,trade_account_no,money,trade_date,card_no) ) b on a.account_no = b.account_no
And a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type
AND b.import_no=#{importNo} and b.case_no =#{caseNo}

删除主表中重复的id,根据上面sql查出的数据

2.一部删除到位

DELETE from ${table} WHERE bd_id IN (
SELECT bd_id from (
SELECT a.bd_id from ${table} a STRAIGHT_JOIN yz_bank_flow_all_add b on a.account_no = b.account_no
AND a.card_no = b.card_no
AND a.trade_account_no = b.trade_account_no
AND a.money = b.money
AND a.trade_date = b.trade_date
AND a.trade_type = b.trade_type
AND b.import_no=#{importNo} and b.case_no =#{caseNo}
) as dd
)

##两张表对于同一条数据不存在多条记录删除

DELETE from ${table where ${table.bd_id in (
SELECT dd.id from(
select count(1) as cc,min(id)-1 as id from (
SELECT bd_id as id,account_no,trade_account_no,money,trade_date,card_no FROM yz_bank_flow_all_add
UNION ALL
SELECT bd_id
-1 as id,account_no,trade_account_no,money,trade_date ,card_no FROM yz_bank_flow_YXSF01 where import_no=#{importNo} and case_no =#{caseNo}
) as c GROUP BY card_no,account_no,trade_account_no,money,trade_date having cc >1
) as dd


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值