分组去重后,且大于1条的数据都是什么
SELECT phone_number,`name`,`result`,COUNT(1) FROM cl_two_element WHERE `result` = 01 GROUP BY `phone_number` HAVING COUNT(1) > 1
表结构
CREATE TABLE risk_score
(
id
bigint(20) NOT NULL AUTO_INCREMENT COMMENT ’ primary key ID/自增主键 ',
customer_type
varchar(50) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘customer_type/用户类型2:商户(E-wallet user),3:用户(Merchant)’,
customer_merchant_id
varchar(50) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘customer ID or merchant ID/用户id或者商户id’,
score
int(4) DEFAULT ‘0’ COMMENT ‘score/评分’,
aml_risk_level
varchar(10) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘aml risk level/等级’,
reason
varchar(255) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘reason/理由’,
if_new_customer
varchar(10) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘if new customer/是否新用户’,
last_score_date
bigint(20) DEFAULT ‘0’ COMMENT ‘last score date/得分时间’,
if_approved
varchar(20) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘if approved/是否通过’,
if_approved_by
varchar(50) COLLATE utf8_bin DEFAULT ‘’ COMMENT ‘if approved by/通过人’,
if_approved_time
bigint(20) DEFAULT ‘0’ COMMENT ‘if approved time/通过时间’,
created_time
bigint(20) DEFAULT ‘0’ COMMENT ‘create time/创建时间’,
modified_time
bigint(20) DEFAULT ‘0’ COMMENT ‘update time/更新时间’,
PRIMARY KEY (id
),
UNIQUE KEY uniq_type_id_date
(customer_type
,customer_merchant_id
,last_score_date
)
) ENGINE=InnoDB AUTO_INCREMENT=453 DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT=‘risk score table’
新增数据INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘1’,‘1’,‘1’,‘0’,’’,’’,’’,‘1’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘2’,‘1’,‘1’,‘0’,’’,’’,’’,‘1’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘3’,‘1’,‘1’,‘0’,’’,’’,’’,‘1’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘4’,‘2’,‘2’,‘0’,’’,’’,’’,‘2’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘5’,‘2’,‘2’,‘0’,’’,’’,’’,‘2’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘6’,‘2’,‘2’,‘0’,’’,’’,’’,‘2’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘7’,‘2’,‘2’,‘0’,’’,’’,’’,‘2’,’’,’’,‘0’,‘0’,‘0’);
INSERT INTO risk_score
(id
, customer_type
, customer_merchant_id
, score
, aml_risk_level
, reason
, if_new_customer
, last_score_date
, if_approved
, if_approved_by
, if_approved_time
, created_time
, modified_time
) VALUES(‘8’,‘3’,‘3’,‘0’,’’,’’,’’,‘3’,’’,’’,‘0’,‘0’,‘0’);
其中唯一索引是
customer_type,
customer_merchant_id,
last_score_date
将三个字段重复的数据只保留一条
2,3,5,6,7 应该删除
1 删除重复的数据sql 是通过 group by 分组得到重复的数据 但是这样只有1条
DELETE FROM risk_score
WHERE id IN (SELECT * FROM (SELECT id FROM risk_score
a GROUP BY a.customer_type
,a.customer_merchant_id
,a.last_score_date
HAVING COUNT(1) > 1) t);
这样只是查询出来1,4
但是只是重复的删除一条,这样执行多次 直到没有也是可以
2 通过 第一步查询出来的数据 inner join 关联 得到全部重复的数据
SELECT
t.id
FROM
risk_score t
INNER JOIN
(SELECT
*
FROM
(SELECT
*
FROM
risk_score
a
GROUP BY a.customer_type
,
a.customer_merchant_id
,
a.last_score_date
HAVING COUNT(1) > 1) b) c
ON t.customer_type = c.customer_type
AND t.customer_merchant_id = c.customer_merchant_id
AND t.last_score_date = c.last_score_date
这是全部重复的
3 既然得到全部重复的数据和重复单一的数据 通过 in not in 去重
这就是要删除全部id 的集合
SELECT
id
FROM
risk_score
WHERE id IN
(SELECT
t.id
FROM
risk_score t
INNER JOIN
(SELECT
*
FROM
(SELECT
*
FROM
risk_score
a
GROUP BY a.customer_type
,
a.customer_merchant_id
,
a.last_score_date
HAVING COUNT(1) > 1) b) c
ON t.customer_type = c.customer_type
AND t.customer_merchant_id = c.customer_merchant_id
AND t.last_score_date = c.last_score_date)
AND id NOT IN
(SELECT
id
FROM
risk_score
a
GROUP BY a.customer_type
,
a.customer_merchant_id
,
a.last_score_date
HAVING COUNT(1) > 1)
这是去重 1,4 剩下要删除的
4 删除 3中重复的id集合
DELETE
FROM
risk_score
WHERE id IN
(SELECT
*
FROM
(SELECT
id
FROM
risk_score
WHERE id IN
(SELECT
t.id
FROM
risk_score t
INNER JOIN
(SELECT
*
FROM
(SELECT
*
FROM
risk_score
a
GROUP BY a.customer_type
,
a.customer_merchant_id
,
a.last_score_date
HAVING COUNT(1) > 1) b) c
ON t.customer_type = c.customer_type
AND t.customer_merchant_id = c.customer_merchant_id
AND t.last_score_date = c.last_score_date)
AND id NOT IN
(SELECT
id
FROM
risk_score
a
GROUP BY a.customer_type
,
a.customer_merchant_id
,
a.last_score_date
HAVING COUNT(1) > 1)) tq);
到此 1个sql 全部删除完毕,唯一联合索引不再有任何重复的
sql 语句啰嗦 但是功能过程正确