5.聚合函数
(1)聚合函数
目的:汇总数据,最终只返回一个值
常用的聚合函数:MAX();MIN();AVG();SUM();COUNT()
【注:只运行非空值】
【注:若想运行非空值,则直接用*,不用列名,仅部分聚合函数可运行】
用法: SELECT 聚合函数(列名/表达式)
FROM 表名
要想返回唯一值 ,得用 DISTINCT 关键词
COUNT(DISTINCT 列名)
练习:分上下半年并求和汇总
USE sql_invoicing;
SELECT 'First half of 2019' AS date_range,
sum(invoice_total) AS total_sales,
sum(payment_total) AS total_payment,
sum(invoice_total)-sum(payment_total) AS what_we_expect
FROM invoices
WHERE invoice_date BETWEEN '2019-01-01' AND '2019-06-30'
#WHERE invoice_date<'2019-07-01'
UNION
SELECT 'Second half of 2019' AS date_range,
sum(invoice_total) AS total_sales,
sum(payment_total) AS total_payment,
sum(invoice_total)-sum(payment_total) AS what_we_expect
FROM invoices
WHERE invoice_date>='2019-07-01'
UNION
SELECT 'Total' AS date_range,
sum(invoice_total) AS total_sales,
sum(payment_total) AS total_payment,
sum(invoice_total)-sum(payment_total) AS what_we_expect
FROM invoices
(2) GROUP BY 子句
目的:对多列数据进行数据汇总(对多列数据使用聚合函数)
用法: SELECT 列1名,列2名,列3名,聚合函数(列名/表达式)
FROM 表名
[WHERE 条件]
GROUP BY 列1名,列2名,列3名
[ORDER BY 列名]
练习:按照日期和付款方式汇总
USE invoicing;
SELECT ps.date ,
pm.name AS payment_method,
sum(amount) AS total_payments
FROM payments ps
JOIN payment_methods pm ON ps.payment_method = pm.payment_method_id
GROUP BY ps.date,ps.payment_method
ORDER BY ps.date
(3)HAVING 子句
目的:在分组后筛选符合条件的数据
【where子句是用在分组前筛选符合条件的数据】
用法: SELECT 列1名,列2名,列3名,聚合函数1(列名/表达式),聚合函数2(列名/表达式)
FROM 表名
[WHERE 条件]
GROUP BY 列1名,列2名,列3名
HAVING 聚合函数1满足的条件 AND 聚合函数2满足的条件
[ORDER BY 列名]
【HAVING子句中提到的列一定要出现在SELECT里,WHERE子句中的列是任意的】
【当SELECT中有聚合函数且对数据进行分组,可以直接根据SELECT子句里的所有列来进行分组】
练习:得到坐标VA且花费超过100美元的顾客
USE store;
#先知道所有的顾客的总消费,然后筛选VA的顾客,最后筛选花费超过100的VA顾客
SELECT
customer_id,
c.state,
sum(oi.quantity*oi.unit_price) AS total_amount
FROM customers c
JOIN orders os USING (customer_id)
JOIN order_items oi USING(order_id)
WHERE state='VA'
GROUP BY customer_id
HAVING total_amount > 100
(4)ROLLUP运算符【只用于MySQL】
目的:对每一组汇总以及整体汇总
用法: SELECT 列1名,列2名,列3名,聚合函数(列名/表达式)
FROM 表名
[WHERE 条件]
GROUP BY 列1名,列2名,列3名 WITH ROLLUP
[ORDER BY 列名]
【注:只能用于汇总数值的列】
练习:汇总对应支付方式的支付金额
USE invoicing;
#汇总所有的支付方式
SELECT *
FROM payments ps
JOIN payment_methods pms
ON ps.payment_method=pms.payment_method_id;
#根据支付方式汇总支付金额
SELECT pms.name,
sum(amount) AS total
FROM payments ps
JOIN payment_methods pms
ON ps.payment_method=pms.payment_method_id
GROUP BY pms.name WITH ROLLUP
6.复杂查询
(1)嵌套子查询
连续:写一个查询来找到所有收入在平均线以上的雇员
USE hr;
#先写计算平均值的子查询
SELECT AVG(salary)
FROM employees;
#再嵌套到查询中
SELECT *
FROM employees
WHERE salary > (
SELECT AVG(salary)
FROM employees
)
(2).用IN运算符写子查询
练习:写一段query来找到没有发票的客户
USE invoicing;
#子查询找到所有有发票的客户[唯一值]
SELECT DISTINCT client_id
FROM invoices;
#嵌套找到不在这个表里的客户
SELECT *
FROM clients
WHERE client_id NOT IN (
SELECT DISTINCT client_id
FROM invoices
)
(3).子查询vs连接join
【两者可以替换,但可读性和运行时间很重要,择优】
练习:写一段query找到订购id=3的顾客,写明顾客id和名字
use store;
#连接
SELECT DISTINCT c.customer_id,c.first_name,c.last_name
FROM customers c
JOIN ORDERS os USING (customer_id)
JOIN order_items ois USING (order_id)
WHERE product_id=3
#子查询#找到购买id=3的顾客
SELECT customer_id
FROM orders o
JOIN order_items ois USING (order_id)
WHERE ois.product_id=3
#嵌套子查询
SELECT c.customer_id,c.first_name,c.last_name
FROM customers c
WHERE c.customer_id IN(
SELECT customer_id
FROM orders o
JOIN order_items ois USING (order_id)
WHERE ois.product_id=3
)
(4).ALL 关键字
>ALL(一列值)# 判断是不是都大于这一列值等价于直接用MAX来判断
(5).ANY关键字
=ANY(一列值)#判断是否有含在一列值里面的值等价于IN运算符
(6).相关子查询
子查询和外查询之间存在相关性
练习:获取高于客户平均值的发票
use invoicing;
#计算每个客户的发票平均值
SELECT client_id,AVG(invoice_total) AS avg_invoice
FROM invoices
GROUP BY client_id;
#查找高于平均值的发票
SELECT *
FROM invoices i
WHERE invoice_total > (
SELECT AVG(invoice_total) AS avg_invoice
FROM invoices
WHERE client_id=i.client_id
)
(7).EXISTS运算符
目的:可以提高运算效率
IN运算符后写的子查询会生成结果集再进行判断
EXISTS运算符并不会生成结果集而是直接判断
练习:找到没有被订购过的产品
use store;
#IN
SELECT *
FROM products
WHERE product_id NOT IN (
select product_id
FROM order_items
);
#EXISTS
SELECT *
FROM products p
WHERE NOT EXISTS(
SELECT product_id
FROM order_items
WHERE product_id = p.product_id
)
(8).SELECT子句中的子查询
use invoicing;
#我写的
SELECT invoice_id,
invoice_total,
(select AVG(INVOICE_TOTAL) FROM INVOICES) AS invoice_average,
invoice_total-(select AVG(INVOICE_TOTAL) FROM INVOICES) AS difference
FROM invoices;
#老师写的
SELECT invoice_id,
invoice_total,
(select AVG(INVOICE_TOTAL) FROM INVOICES) AS invoice_average,
invoice_total-(select invoice_average) AS difference
FROM invoices
练习:
use invoicing;
SELECT client_id,
name,
(SELECT SUM(invoice_total)
FROM invoices
WHERE client_id = c.client_id) AS total_sales,
(SELECT AVG(invoice_total) AS total_sales
FROM invoices) AS average,
(select total_sales) - (select average) AS difference
FROM clients c
(9).FROM子句中的子查询
需要给子查询命名
【注:很复杂,最好只用于简单的子查询中】
7.数值函数
(1)处理数值的函数
ROUND(数值,保留的小数位) #四舍五入
CEILING()#返回大于或等于这个数字的最小整数
FLOOR() #返回小于或等于这个数字的最大整数
ABS() #绝对值
RAND() #生成[0,1]之间的随机数
(2)处理字符串的函数
LENGTH() #返回字符串的长度
UPPER() #转为大写
LOWER() #转为小写
LTRIM()#删除左边多余的空格
RTRIM()#删除右边多余的空格
TRIM() #删除多余的空格
LEFT(字符串,需获取的长度)
RIGHT(字符串,需获取的长度) #返回字符串右侧的所需长度的字符串(截断)
SUBSTRING(字符串,起始位置,需获取的长度) #可以得到一个字符串中任何位置的字符
LOCATE(需搜索的字符,字符串) #返回第一个字符或者一串字符匹配位置
REPLACE(字符串,原来的字符,替换的字符)#替换字符
CONCAT(字符串1,字符串2)#串联两个字符串
(3).处理日期和时间的函数
NOW() #返回现在的日期和时间
CURDATE() #返回现在的日期
CURTIME() #返回现在的时间
YEAR() #返回年份
MONTH
DAY
HOUR/MINUTE/SECOND
DAYNAME #返回字符串格式的星期
EXTRACT(想获取的单位 FROM 关键字) #提取关键字中想获取的部分
练习:提取所在年份的订单
SELECT *
FROM orders
WHERE YEAR(order_date)=YEAR(NOW())
(4).格式化日期和时间
DATE_FORMAT(时间字符串,‘%m %d %y')
%y 得到两位的年份
%Y %得到四位数的年份
%m % 得到两位数的月份
%M 得到月份名称
%d 天
TIME_FORMAT(时间字符串,’%H:%i %p)
(5).计算日期和时间
DATE_ADD(日期字符串,INTERVAL x DAY/YEAR/MONTH)#
(x可正可负,负就是回到过去)
DATE_SUB()
DATEDIFF(日期1,日期2) #计算两个日期的间隔
TIME_TO_SEC(时间1)-TIME_TO_SEC(时间2)#计算两个时间的间隔
(6).IFNULL函数
目的:用其他内容替换null值
用法:IFNULL(列名,替换值)
(7).COALESCE函数
目的:返回一堆值中的第一个非空值
用法:COALESCE(列名1,列名2,替换值)
(8).IF函数
目的:测试单个表达式
用法:SELECT IF(表达式,第一个值,第二个值)
FROM
USE store;
SELECT *
FROM (
SELECT product_id,
name,
(SELECT COUNT(order_id)
FROM order_items
WHERE product_id=p.product_id
) AS orders,
IF((SELECT orders)>1,'Many times','Once') AS Frequency
FROM products P
) AS PRODUCT_FREQUENCY
WHERE orders >0
###老师写的
USE store;
SELECT product_id,
name,
COUNT(*) AS orders,
IF (COUNT(*)>1,'many times','once') AS frequency
FROM products
JOIN ORDER_ITEMS USING(PRODUCT_ID)
GROUP BY product_id,name
9.CASE函数
目的:测试多个表达式
用法:SELECT CASE
WHEN 表达式1 THEN第一个值
WHEN 表达式2 THEN第二个值
ELSE 第三个值
END AS 别名
FROM
练习:
USE store;
SELECT concat(first_name,' ' ,last_name),
points,
CASE
WHEN points > 3000 THEN '黄金'
WHEN points BETWEEN 2000 AND 3000 THEN '白银'
ELSE '青铜'
END AS category
FROM customers
ORDER BY points DESC