DINGJUN123>SELECT * FROM t 2 ORDER BY years,months,product_name; YEARS MONTHS PRODUCT_NAME SALES ---------- ---------- -------------------- ---------- 2008 1 A 1000 2008 1 B 1500 2008 2 A 2000 2008 2 B 3000 2008 2 C 1000 2008 3 A 3000 已选择6行。
--1.WITH子句将时间和product_name固定下来,然后在查询中生成笛卡尔积,这样就有每个产品的所有时间段数据,当然 --这里的WITH子句也可以直接生成笛卡尔积 --2.笛卡尔积和原始表t做外连接,从而实现数据稠密化过程,当然这里可以使用t RIGHT JOIN …这样就不需要写那么 --多+号了。 --这里的WITH子句从原始表中DISTINCT,如果原始表很大,效率不好,实际应用经常是从其他关联表获取或自己构造 WITH year_month AS (SELECT DISTINCT years,months FROM t), product AS (SELECT DISTINCT product_name FROM t) SELECT m.years,m.months,m.product_name,NVL(t.sales,0) sales FROM t, (SELECT years,months,product_name FROM year_month,product) m WHERE t.years(+) = m.years AND t.months(+) = m.months AND t.product_name(+) = m.product_name ORDER BY 1,2,3;
--按照上面说的改进的SQL,WITH直接生成笛卡尔积,然后使用SQL92新的外连接语法,省略了很多+号,更容易理解 WITH m AS (SELECT years,months,product_name FROM (SELECT DISTINCT years,months FROM t), (SELECT DISTINCT product_name FROM t) ) SELECT m.years,m.months,m.product_name,NVL(t.sales,0) sales FROM t RIGHT JOIN m ON t.years = m.years AND t.months = m.months AND t.product_name = m.product_name ORDER BY 1,2,3;
YEARS MONTHS PRODUCT_NAME SALES ---------- ---------- -------------------- ---------- 2008 1 A 1000 2008 1 B 1500 2008 1 C 0 2008 2 A 2000 2008 2 B 3000 2008 2 C 1000 2008 3 A 3000 2008 3 B 0 2008 3 C 0 已选择9行。
--这里的m是从原始表中获取的,实际应用中一般自己构造或从其他关联表获取,从原始表获取DISTINCT,可能效率不好。 SELECT m.years,m.months,t.product_name,NVL(t.sales,0) sales FROM t PARTITION BY (t.product_name) RIGHT JOIN (SELECT DISTINCT years,months FROM t) m ON t.years = m.years AND t.months = m.months ORDER BY 1,2,3;
SELECT m.years,m.months,t.product_name,NVL(t.sales,0) sales FROM (SELECT DISTINCT years,months FROM t) m LEFT JOIN t PARTITION BY (t.product_name) ON t.years = m.years AND t.months = m.months ORDER BY 1,2,3;
--只考虑2008年前3月,如需详细学习MODEL子句,请参考相关文档,MODEL很强大,可以实现很多复杂功能 SELECT years,months,product_name,NVL(sales,0) FROM t MODEL PARTITION BY (product_name) DIMENSION BY (years,months) MEASURES(sales) RULES (sales[2008, FOR months IN (1,2,3)] = sales[2008,CV()]) ORDER BY 1,2,3;
SELECT m.years,m.months,t.product_name,NVL(t.sales,0) sales FROM t RIGHT JOIN (SELECT DISTINCT years,months FROM t) m PARTITION BY (t.product_name) ON t.years = m.years AND t.months = m.months ORDER BY 1,2,3;
这个语句不会报错,但是结果:
YEARS MONTHS PRODUCT_NAME SALES ---------- ---------- -------------------- ---------- 2008 1 A 1000 2008 1 B 1500 2008 2 A 2000 2008 2 B 3000 2008 2 C 1000 2008 3 A 3000 已选择6行。
DINGJUN123>SELECT m.years,m.months,t.product_name,NVL(t.sales,0) sales 2 FROM t 3 LEFT JOIN 4 (SELECT DISTINCT years,months FROM t) m 5 PARTITION BY (t.product_name) 6 ON t.years = m.years 7 AND t.months = m.months 8 ORDER BY 1,2,3; PARTITION BY (t.product_name) * 第 5 行出现错误: ORA-00904: : 标识符无效
--使用分析函数对比产品当前月销售与上月销售情况,计算递增量,可以对决策支持提供很清晰的报表 SELECT years,months,product_name,sales, sales-NVL(LAG(sales) OVER(PARTITION BY product_name ORDER BY years,months) ,0) add_last_sales FROM ( SELECT m.years,m.months,t.product_name,NVL(t.sales,0) sales FROM (SELECT DISTINCT years,months FROM t) m LEFT JOIN t PARTITION BY (t.product_name) ON t.years = m.years AND t.months = m.months );
结果如下:
YEARS MONTHS PRODUCT_NAME SALES ADD_LAST_SALES ---------- ---------- -------------------- ---------- -------------- 2008 1 A 1000 1000 2008 2 A 2000 1000 2008 3 A 3000 1000 2008 1 B 1500 1500 2008 2 B 3000 1500 2008 3 B 0 -3000 2008 1 C 0 0 2008 2 C 1000 1000 2008 3 C 0 -1000 已选择9行。
DINGJUN123>SELECT * FROM t 2 ORDER BY years,months,product_name,regions; YEARS MONTHS PRODUCT_NAME REGIONS SALES ---------- ---------- -------------------- -------------------- ---------- 2007 2 C 北区 2000 2008 1 A 东区 1000 2008 1 A 西区 2500 2008 1 B 东区 1500 2008 2 A 南区 2000 2008 2 A 西区 500 2008 2 B 东区 3000 2008 2 C 北区 1000 2008 3 A 东区 3000 已选择9行。
现在按年、产品、区域汇总销售额,结果如下:
DINGJUN123>SELECT years,product_name,regions,SUM(sales) sum_sales 2 FROM t 3 GROUP BY years,product_name,regions; YEARS PRODUCT_NAME REGIONS SUM_SALES ---------- -------------------- -------------------- ---------------------------------------------------- 2007 C 北区 2000 2008 A 东区 4000 2008 A 南区 2000 2008 A 西区 3000 2008 B 东区 4500 2008 C 北区 1000 已选择6行。
--方法1:首先补全每个产品所有区域的汇总数据,然后按产品和区域分区与所有年份外连接,从而补全每个产品横跨年和区域的汇--总数据。 WITH v1 AS (SELECT years,product_name,regions,SUM(sales) sum_sales FROM t GROUP BY years,product_name,regions), v2 AS (SELECT DISTINCT regions FROM t), v3 AS (SELECT DISTINCT years FROM t) SELECT v3.years,v4.product_name,v4.regions,v4.sum_sales FROM (SELECT v1.years,v1.product_name,v2.regions,v1.sum_sales FROM v1 PARTITION BY (v1.product_name) RIGHT JOIN v2 ON v1.regions = v2.regions ) v4 PARTITION BY (product_name,regions) RIGHT JOIN v3 ON v3.years = v4.years ORDER BY 2,1,3;
--方法2:先补全每个产品所有年的汇总数据,然后按产品和年分区与区域外连接,从而实现年和区域的二维数据稠密--化 WITH v1 AS (SELECT years,product_name,regions,SUM(sales) sum_sales FROM t GROUP BY years,product_name,regions), v2 AS (SELECT DISTINCT regions FROM t), v3 AS (SELECT DISTINCT years FROM t) SELECT v4.years,v4.product_name,v2.regions,v4.sum_sales FROM (SELECT v3.years,v1.product_name,v1.regions,v1.sum_sales FROM v1 PARTITION BY (v1.product_name) RIGHT JOIN v3 ON v1.years = v3.years ) v4 PARTITION BY (product_name,years) RIGHT JOIN v2 ON v2.regions = v4.regions ORDER BY 2,1,3;
/**前面已经说了,这里WITH子句中很多DISTINCT数据都是从原始表来的,如果销售表t几百万行几千万行,这种DISTINCT必然影响效率,这个一般不符合实际情况,我只是做个例子而已,--在实际应用中一般要么自己构造,要么从其他配置表中获取,比如regions应该有独立的区域表,存储的数据不多,这样--可以提高效率,比如years,一般统计的年份都是固定的,可以自己构造或从其他表中获取,可能有个时间表。 **/ WITH v1 AS (SELECT years,product_name,regions,SUM(sales) sum_sales FROM t GROUP BY years,product_name,regions), --这里的f_str2list是自己写的函数,将逗号分割的字符串分割为多行数据 v2 AS (SELECT COLUMN_VALUE regions FROM TABLE(CAST (f_str2list('东区,南区,西区,北区') AS varchar2_tt)) ), --自己构造指定时间维度数据 v3 AS (SELECT 2007+LEVEL-1 years FROM DUAL CONNECT BY LEVEL<3 ) SELECT v3.years,v4.product_name,v4.regions,v4.sum_sales FROM (SELECT v1.years,v1.product_name,v2.regions,v1.sum_sales FROM v1 PARTITION BY (v1.product_name) RIGHT JOIN v2 ON v1.regions = v2.regions ) v4 PARTITION BY (product_name,regions) RIGHT JOIN v3 ON v3.years = v4.years ORDER BY 2,1,3;
YEARS PRODUCT_NAME REGIONS SUM_SALES --------- -------------------- -------------------- ---------- 2007 A 东区 2007 A 北区 2007 A 南区 2007 A 西区 2008 A 东区 4000 2008 A 北区 2008 A 南区 2000 2008 A 西区 3000 2007 B 东区 2007 B 北区 2007 B 南区 2007 B 西区 2008 B 东区 4500 2008 B 北区 2008 B 南区 2008 B 西区 2007 C 东区 2007 C 北区 2000 2007 C 南区 2007 C 西区 2008 C 东区 2008 C 北区 1000 2008 C 南区 2008 C 西区 已选择24行。
DINGJUN123>ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD'; 会话已更改。 DINGJUN123>SELECT * FROM t 2 ORDER BY product_name,done_date; PRODUCT_NAME DONE_DATE SALES -------------------- ---------- ---------- A 2010-07-01 1000 A 2010-07-05 2000 B 2010-07-02 3000 B 2010-07-04 4000 已选择4行。
WITH v1 AS (SELECT DATE'2010-07-01'+LEVEL-1 done_date FROM DUAL CONNECT BY LEVEL<6) SELECT product_name,done_date,sales, LAST_VALUE(sales IGNORE NULLS) OVER(PARTITION BY product_name ORDER BY done_date) recent_sales FROM ( SELECT t.product_name,v1.done_date,t.sales FROM t PARTITION BY (t.product_name) RIGHT JOIN v1 ON t.done_date = v1.done_date );
显示结果如下:
PRODUCT_NAME DONE_DATE SALES RECENT_SALES -------------------- ---------- ---------- ------------ A 2010-07-01 1000 1000 A 2010-07-02 1000 A 2010-07-03 1000 A 2010-07-04 1000 A 2010-07-05 2000 2000 B 2010-07-01 B 2010-07-02 3000 3000 B 2010-07-03 3000 B 2010-07-04 4000 4000 B 2010-07-05 4000 已选择10行。