一、建库和用户权限
- 建立数据库:create database db_test; 建立名叫db_test的数据库
- 权限:grant all privileges on db_test.* to 'root'@'%' Identified by '123456 with grant option
GRANT:赋权命令
ALL PRIVILEGES:当前用户的所有权限
ON:介词
*.*:当前用户对所有数据库和表的相应操作权限
TO:介词
‘root’@’%’:权限赋给root用户,所有ip都能连接
IDENTIFIED BY ‘123456’:连接时输入密码,密码为123456
WITH GRANT OPTION:允许级联赋权
——————————————————————
show grants for name;//查看用户权限
grant select on db_test.* to username;//赋予username用户db_test数据库所有权限
revoke select on db_test.* to username;//去除username用户db_test数据库所有权限
更改密码:set password for name=password('密码');
二、数据库操作
- create 创建: CREATE DATABASE db_name; //db_name为数据库名
- drop 删除: DROP DATABASE db_name;
- show\use: show database; 显示\使用
三、数据表操作
- 建表
create table table_name(
id int not null auto_increment, //整型 非空 递增
name varchar(255) not null, //无符号字符型 非空
score tinyint unsigned not null default 0, //非空设置默认值0
primary key(id) // 主键
)ENGINE=InnoDB //设置表的存储引擎,常用InnoDB\MylSAM;常用InnoDB
DEFAULT charset=utf8; /设置默认编码格式
-
- 复制表
create table tb_name2 select * from tb_name1; //从1表复制给2表
-
- 建立临时表
- create temporary table tb_name;
- 删除表
- drop table IF EXISTS tb_name;
- 建立临时表
- 重命名
rename table tb_oldname to tb_newname;
alter table tb_oldname to tb_newname;
四、数据字段操作
插入数据insert
- insert into tb_name(id,name,age) valuse(null,'张三',25),(null,'李四',20)……;//插入新建数据
- insert into tb_name(name,score) select name,score from tb_othername2;
- 更新数据 update
-
- update tb_name set name='王五',age=15 where id=1;//UPDATE tb_name SET columnName=newValue WHERE conditon;
- 删除数据 delete
- delete frome db_name where id=1;
- 查看数据 select
- select * from db_name where conditon;
- 条件过滤
- where语句:select * from db_name WHERE id=3 AND id=4;
- having语句: \
-
——————模版————————
select column_name ,aggregate_function(column_name)
from table_name
where column_name operator value
group by column_name
having aggregate_function(column) operator value
- aggregate_function合计函数
AVG(column) | 返回某列的平均值 |
COUNT(column) | 返回某列的行数(不包括 NULL 值) |
COUNT(*) | 返回被选行数 |
FIRST(column) | 返回在指定的域中第一个记录的值 |
LAST(column) | 返回在指定的域中最后一个记录的值 |
MAX(column) | 返回某列的最高值 |
MIN(column) | 返回某列的最低值 |
STDEV(column) | |
STDEVP(column) | |
SUM(column) | 返回某列的总和 |
UCASE(c) | 将某个域转换为大写 |
LCASE(c) | 将某个域转换为小写 |
MID(c,start[,end]) | 从某个文本域提取字符 |
LEN(c) | 返回某个文本域的长度 |
INSTR(c,char) | 返回在某个文本域中指定字符的数值位置 |
LEFT(c,number_of_char) | 返回某个被请求的文本域的左侧部分 |
RIGHT(c,number_of_char) | 返回某个被请求的文本域的右侧部分 |
ROUND(c,decimals) | 对某个数值域进行指定小数位数的四舍五入 |
MOD(x,y) | 返回除法操作的余数 |
NOW() | 返回当前的系统日期 |
FORMAT(c,format) | 改变某个域的显示方式 |
DATEDIFF(d,date1,date2) | 用于执行日期计算 |
-
- 相关控制符
- =; < ;> ; in( 1,2); between a and b;not ;or; null;and;
- 相关控制符
- like用法 :select * from tb_name where user like '%d'; //%匹配任意一段字符,_匹配任意一个字符
- GROUP BY 分组
-
- 规范:
-
SELECT o.custid, c.name, MAX(o.payment) FROM orders AS o, customers AS c WHERE o.custid = c.custid GROUP BY o.custid;
对于group by 分组有依赖问题,当两表联立查询时,group by依赖必须有 两表字符
- 一表的数据可以用聚合函数
- 二表是一一对应的必须在group by 添加其字符
-
-
- eg: elect
-
-
a.deptno,
d.dname
FROM emp a,dept d
where a.deptno=d.deptno
GROUP BY a.deptno
> 1055 - Expression #2 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.d.dname' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
> 时间: 0s//解决方法:在GROUP BY a.deptno,d.dname
<2>案列:数据库开窗函数
案列1
我们有如下的用户访问数据
userId visitDate visitCount
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
U02 2017/1/23 6
U01 2017/2/22 4
要求使用SQL统计出每个用户每月的累积访问次数,如下表所示:
解: select
a.*,
SUM(count) over(PARTITION by userId ORDER BY mm) total_cnt
from (select
userId,
DATE_FORMAT(REPLACE(visitDate,'/','-'),'%Y-%m') mm,
SUM(visitCount) count
FROM
test1
GROUP BY userId,mm
)a
用户id 月份 小计 累积
u01 2017-01 11 11
u01 2017-02 12 23
u02 2017-01 12 12
u03 2017-01 8 8
u04 2017-01 3 3
CREATE TABLE bigdata.test1 (
userId varchar(14),
visitDate varchar(14),
visitCount INT )
;
INSERT INTO TABLE bigdata.test1
VALUES
( 'u01', '2017/1/21', 5 ),
( 'u02', '2017/1/23', 6 ),
( 'u03', '2017/1/22', 8 ),
( 'u04', '2017/1/20', 3 ),
( 'u01', '2017/1/23', 6 ),
( 'u01', '2017/2/21', 8 ),
( 'u02', '2017/1/23', 6 ),
( 'u01', '2017/2/22', 4 );
案例2
有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,
访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,数据如下:
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c / u5 b /u4 b /u6 c /u2 c /u1 b /u2 a /u2 a /u3 a /u5 a /u5 a /u5 a
请统计:
(1)每个店铺的UV(访客数)
解: select
shop,
COUNT(user_id)
FROM test2
GROUP BY shop
(2)每个店铺访问次数top3的访客信息。
输出店铺名称、访客id、访问次数
解: select
*
FROM(
select
shop,user_id,COUNT(user_id) as cnt_shop,
ROW_NUMBER() over(PARTITION by shop order by COUNT(user_id) desc) cnt_rank
FROM test2
GROUP BY shop ,user_id
)a
WHERE cnt_rank<4
建表:CREATE TABLE bigdata.test2 (
user_id varchar(14),
shop varchar(14) );
INSERT INTO bigdata.test2 VALUES
( 'u1', 'a' ),
( 'u2', 'b' ),
( 'u1', 'b' ),
( 'u1', 'a' ),
( 'u3', 'c' ),
( 'u4', 'b' ),
( 'u1', 'a' ),
( 'u2', 'c' ),
( 'u5', 'b' ),
( 'u4', 'b' ),
( 'u6', 'c' ),
( 'u2', 'c' ),
( 'u1', 'b' ),
( 'u2', 'a' ),
( 'u2', 'a' ),
( 'u3', 'a' ),
( 'u5', 'a' ),
( 'u5', 'a' ),
( 'u5', 'a' );
案例3
已知一个表STG.ORDER,有如下字段:
Date,Order_id,User_id,amount。
数据样例:2017-01-01,10029028,1000003251,33.57。
请给出sql进行统计:
- 给出 2017年每个月的订单数、用户数、总成交金额。
- 给出2017年11月的新客数(指在11月才有第一笔订单)
CREATE TABLE bigdata.test3 (
dt varchar(14),
order_id varchar(14),
user_id varchar(14),
amount DECIMAL ( 10, 2 ) );
INSERT INTO bigdata.test3 VALUES ('2017-01-01','10029028','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-01-01','10029029','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-01-01','100290288','1000003252',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-02-02','10029088','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-02-02','100290281','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-02-02','100290282','1000003253',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-11-02','10290282','100003253',234);
INSERT INTO bigdata.test3 VALUES ('2017-11-01','1002902912','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-11-01','10029028812','1000003252',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-11-02','1002908812','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2017-11-02','100290281','1000003251',33.57);
INSERT INTO bigdata.test3 VALUES ('2018-11-02','10290284','100003243',234);
案例4
CREATE TABLE `employees` (
`eid` char(20) NOT NULL COMMENT '员工编号',
`ename` char(10) NOT NULL COMMENT '姓名',
`edu` char(4) NOT NULL COMMENT '学历',
`birthday` date NOT NULL COMMENT '出生日期',
`sex` char(2) NOT NULL COMMENT '性别',
`status` tinyint(1) DEFAULT NULL COMMENT '是否在职,0在职,1离职',
`addr` varchar(20) DEFAULT NULL COMMENT '地址',
`tel` char(12) DEFAULT NULL COMMENT '电话号码',
`dept_id` char(3) DEFAULT NULL COMMENT '员工部门号',
create_time timestamp not null default current_timestamp,
update_time timestamp not null default current_timestamp on update current_timestamp,
PRIMARY KEY (`eid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `employees` VALUES ('000001', '王林', '大专', '1966-01-23', '1', 0, '中山路32-1-508', '83355668', '2','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('010008', '伍荣华', '本科', '1976-03-28', '1', 0, '北京东路100-2', '83321321', '1','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('020010', '王向容', '硕士', '1982-12-09', '1', 0, '四牌楼10-0-108', '83792361', '1','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('020018', '李丽', '大专', '1960-07-30', '0', 1, '中山东路102-2', '83413301', '1','2018-08-24 23:04:10','2021-01-24 23:04:10');
INSERT INTO `employees` VALUES ('102201', '刘明', '本科', '1972-10-18', '1', 0, '虎踞路100-2', '83606608', '5','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('102208', '朱俊', '硕士', '1965-09-28', '1', 1, '牌楼巷5-3-106', '84708817', '5','2018-08-24 23:04:10','2021-01-24 23:04:10');
INSERT INTO `employees` VALUES ('108991', '钟敏', '硕士', '1979-08-10', '0', 0, '中山路10-3-105', '83346722', '3','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('111006', '张石兵', '本科', '1974-10-01', '1', 1, '解放路34-1-203', '84563418', '5','2018-08-24 23:04:10','2021-02-24 23:04:10');
INSERT INTO `employees` VALUES ('210678', '林涛', '大专', '1977-04-02', '1', 0, '中山北路24-35', '83467336', '3','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('302566', '李玉民', '本科', '1968-09-20', '1', 1, '热河路209-3', '58765991', '4','2018-08-24 23:04:10','2021-02-24 23:04:10');
INSERT INTO `employees` VALUES ('308759', '叶凡', '本科', '1978-11-18', '1', 0, '北京西路3-7-52', '83308901', '4','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('504209', '陈琳琳', '大专', '1969-09-03', '0', 0, '汉中路120-4-12', '84468158', '4','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('0000011', '王一', '大专', '1966-01-23', '1', 0, '中山路32-1-508', '83355668', '2','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('0100081', '伍二', '本科', '1976-03-28', '1', 0, '北京东路100-2', '83321321', '1','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('0200101', '王三', '硕士', '1982-12-09', '1', 1, '四牌楼10-0-108', '83792361', '1','2018-08-24 23:04:10','2021-03-24 23:04:10');
INSERT INTO `employees` VALUES ('0200181', '李丽2', '大专', '1960-07-30', '0', 0, '中山东路102-2', '83413301', '1','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('1022011', '刘明2', '本科', '1972-10-18', '1', 0, '虎踞路100-2', '83606608', '5','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('1022081', '朱俊2', '硕士', '1965-09-28', '1', 1, '牌楼巷5-3-106', '84708817', '5','2018-08-24 23:04:10','2021-03-24 23:04:10');
INSERT INTO `employees` VALUES ('1089911', '钟敏2', '硕士', '1979-08-10', '0', 0, '中山路10-3-105', '83346722', '3','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('1110061', '张石兵2', '本科', '1974-10-01', '1', 1, '解放路34-1-203', '84563418', '5','2018-08-24 23:04:10','2021-04-24 23:04:10');
INSERT INTO `employees` VALUES ('2106781', '林涛2', '大专', '1977-04-02', '1', 0, '中山北路24-35', '83467336', '3','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('3025661', '李玉民2', '本科', '1968-09-20', '1', 1, '热河路209-3', '58765991', '4','2018-08-24 23:04:10','2021-04-24 23:04:10');
INSERT INTO `employees` VALUES ('3087591', '叶凡2', '本科', '1978-11-18', '1', 0, '北京西路3-7-52', '83308901', '4','2018-08-24 23:04:10','2018-08-24 23:04:10');
INSERT INTO `employees` VALUES ('5042091', '陈琳琳2', '大专', '1969-09-03', '0', 1, '汉中路120-4-12', '84468158', '4','2018-08-24 23:04:10','2021-08-24 23:04:10');
需求:
求2021年每个月的离职率
SELECT
a.*,
ROUND(left_cnt/onnow_cnt,2)*100 as rato
FROM
(
SELECT
a.*,
(on_cnt-left_sum) as on_cntt,
(left_cnt+on_cnt-left_sum) as onnow_cnt
FROM
(
SELECT
a.*,
b.*,
SUM(left_cnt) over(ORDER BY left_mm) left_sum
from
(SELECT
DATE_FORMAT(update_time,"%Y-%m") as left_mm,
COUNT(1)as left_cnt
FROM
employees
GROUP BY
DATE_FORMAT(update_time,"%Y-%m"))a,
(SELECT
COUNT(1) as on_cnt
FROM
employees)b
WHERE left_mm>"2018-08"
ORDER BY left_mm )a)a