①创建mydata数据库,创建表userbehavior
# 创建数据库
CREATE DATABASE IF NOT EXISTS mydata CHARSET utf8;
USE mydata;
# 创建表userbehavior
CREATE TABLE userbehavior (
user_id INT (10),
item_id INT (10),
category_id INT (10),
behavior_type VARCHAR (10),
timestamps VARCHAR (20)
) ;
②导入已经下载好的csv数据
SHOW VARIABLES LIKE '%local%';
SET GLOBAL local_infile=1;
LOAD DATA LOCAL INFILE 'E:/dataset/UserBehavior.csv'
INTO TABLE userbehavior
FIELDS TERMINATED BY ','
IGNORE 1 LINES;
#查看内容,检查表结构
SELECT * FROM userbehavior LIMIT 10;
SELECT COUNT(*) FROM userbehavior; -- 100150806
DESC user_behavior;
③本次只选取前100万行数据,为了选取方便,我们添加序列号ID
// 添加序列号ID
ALTER TABLE userbehavior ADD id INT(8) FIRST;
ALTER TABLE userbehavior MODIFY id INT(8) PRIMARY KEY AUTO_INCREMENT;
#删除表中id大于100万的数据
DELETE FROM userbehavior WHERE id>1000000;