费尽周折写的一个mysql多字段滤重sql

需求是两个表的合并 需要从表user_reply和reply_mention合并到表user_reply_mention,但是合并之后rid和uid有重复数据,需要滤重,不想写代码,写了个sql脚本,通过中间表过滤了。
CREATE TABLE `user_reply_mention` (
  `id` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',
  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',
  `uid` int(11) DEFAULT NULL COMMENT '用户ID',
  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',
  `create_time` datetime DEFAULT NULL COMMENT '创建时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `unique_rid_uid` (`rid`,`uid`)
) ENGINE=InnoDB AUTO_INCREMENT=3000000 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表'; 



CREATE TABLE `user_reply_mention2` (
  `id` int(11) unsigned NULL AUTO_INCREMENT COMMENT '自增ID',
  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',
  `uid` int(11) DEFAULT NULL COMMENT '用户ID',
  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',
  `create_time` datetime DEFAULT NULL COMMENT '创建时间',
  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  PRIMARY KEY (`id`),
  KEY `index_rid_uid` (`rid_uid`(191))
) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表2';

CREATE TABLE `user_reply_mention3` (
  `id` int(11) DEFAULT  NULL COMMENT '自增ID',
  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',
  `uid` int(11) DEFAULT NULL COMMENT '用户ID',
  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',
  `create_time` datetime DEFAULT NULL COMMENT '创建时间',
  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  KEY `rid_uid` (`rid_uid`(191))
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表3';


CREATE TABLE `user_reply_mention4` (
  `id` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',
  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',
  `uid` int(11) DEFAULT NULL COMMENT '用户ID',
  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',
  `create_time` datetime DEFAULT NULL COMMENT '创建时间',
  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  PRIMARY KEY (`id`),
  KEY `rid_uid` (`rid_uid`(191))
) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表3';


#导入数据
alter table user_reply_mention2 auto_increment=0; 
insert into user_reply_mention2(id,uid,rid,create_time,type) select id,uid,rid,createtime,1 as type from user_reply; 

alter table user_reply_mention2 auto_increment=2000000;
insert into user_reply_mention2(uid,rid,create_time,type) select uid,rid,create_time,2 as type from reply_mention;
alter table user_reply_mention2 auto_increment=3000000;


update user_reply_mention2 set rid_uid = concat(rid,'-',uid);

#开始滤重
insert into user_reply_mention3(rid_uid) select A.rid_uid from  ((select count(id) co, rid_uid  from user_reply_mention2 group by rid_uid having co > 1) A); //133439

update user_reply_mention3 B,user_reply_mention2 A set B.id = A.id where B.rid_uid = A.rid_uid;
#总重复数
select sum(A.co) from ((select count(1) as  co, rid_uid  from user_reply_mention2 group by rid_uid having co > 1)A);//266893
select count(B.id) from user_reply_mention3 B,user_reply_mention2 A  where B.rid_uid = A.rid_uid and A.id != B.id; // 133454(删除数据)
#user_reply_mention2 备份到4
insert into user_reply_mention4(id,uid,rid,create_time,type,rid_uid) select id,uid,rid,create_time,type,rid_uid from user_reply_mention2; 
#删除2中重复数据

select count(*) from user_reply_mention4   where id in (select A.id from user_reply_mention2 A,user_reply_mention3 B where A.rid_uid = B.rid_uid and A.id != B.id); //133454(删除数据)

delete from user_reply_mention4   where id in (select A.id from user_reply_mention2 A,user_reply_mention3 B where A.rid_uid = B.rid_uid and A.id != B.id); //133454(删除数据)

#检查是否有重复数据
select count(id) co, rid_uid  from user_reply_mention4 group by rid_uid having co > 1; //0条
#从4插入
从2插入 4插入
insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,create_time,type from user_reply_mention2; //1661812
#删除
delete from user_reply_mention where id < 3000000;
#验证从4插入
insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,create_time,type from user_reply_mention4; //1528358

#插入

#上线之后数据导入

select id,uid,rid,createtime,1 as type from user_reply where id > 1776557;

select max(id) from user_reply_mention2;1776557 最大id(type为1)
2147792(type为2)
insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,createtime,1 as type from user_reply where id > 1776557; 
1790896
1924350(总数)


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值