1.在源数据基础上直接去重
DELETE FROM data
WHERE (event_time, order_id, product_id, category_id, category_code, brand, price, user_id, age, sex, `local`) in (
SELECT t.* FROM (
SELECT * FROM data
GROUP BY event_time, order_id, product_id, category_id, category_code, brand, price, user_id, age, sex, `local`
HAVING COUNT(*) > 1) t
)
AND
user_id NOT IN (
SELECT t.user_id FROM (
SELECT * FROM data
GROUP BY event_time, order_id, product_id, category_id, category_code, brand, price, user_id, age, sex, `local`
HAVING COUNT(*) > 1) t
);
2.创建新表—distinct方法
CREATE TABLE data_clean AS SELECT DISTINCT * FROM data;