如何使用大数据 SQL 语句

如何使用大数据 SQL 语句

标签: SQL, 大数据
从一个庞大的数据库中众多的表格和视图中 query 出所需的数据,是一件熟能生巧的工作。

SELECT itemid, itembrand, itemunitprice,
  RANK() OVER ( ORDER BY itemunitprice ) AS rankunitprice
  FROM ssitem;

RANK() OVER

SELECT custname, SUM(salesdollar) AS sumsales,
  RANK() OVER ( ORDER BY SUM(salesdollar) DESC ) salesrank
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custname;

RANK() OVER

SELECT custstate, custname, sum(salesdollar) AS sumsales,
  RANK() OVER ( PARTITION BY custstate
                ORDER BY SUM(salesdollar) DESC ) salesrank
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custstate, custname
  ORDER BY custstate;

PARTITION BY

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  SUM(SUM(salesdollar)) OVER
    (ORDER BY storezip, timeyear
     ROWS UNBOUNDED PRECEDING) AS cumsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;

SUM() OVER

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  SUM(SUM(salesdollar)) OVER
    (PARTITION BY storezip
     ORDER BY storezip, timeyear
     ROWS UNBOUNDED PRECEDING) AS cumsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;

PARTITION BY

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  ROUND(AVG(SUM(salesdollar)) OVER
       (ORDER BY storezip, timeyear
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), 2) AS centermovavgsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;

ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING

SELECT timeyear, custcity, SUM(salesdollar) AS sumsales,
  ROUND(RATIO_TO_REPORT(SUM(salesdollar))
  OVER (PARTITION BY timeyear), 2) AS sumsalesratio
  FROM sscustomer, sssales, sstimedim
  WHERE sssales.custid = sscustomer.custid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY timeyear, custcity
  ORDER BY timeyear, SUM(salesdollar) DESC;

RATIO_TO_REPORT

SELECT itemname, itembrand, itemunitprice, cumdistunitprice
  FROM (SELECT itemid, itemname, itembrand, itemunitprice, 
    CUME_DIST() OVER (ORDER BY itemunitprice DESC) AS cumdistunitprice
    FROM ssitem)
  WHERE cumdistunitprice <= 0.8;

CUME_DIST() OVER

SELECT custname, SUM(salesunits) AS sumsalesunits,
  RANK() OVER (ORDER BY SUM(salesunits)) AS ranksalesunits,
  PERCENT_RANK() OVER (ORDER BY SUM(salesunits)) AS perranksalesunits,
  ROW_NUMBER() OVER (ORDER BY SUM(salesunits)) AS rownumsalesunits,
  ROUND(CUME_DIST() OVER  (ORDER BY SUM(salesunits)), 2) AS cumdistsalesunits
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custname;

cumulative distribution function

SELECT itemname, itemunitprice,
  RANK() OVER (ORDER BY itemunitprice) AS rankunitprice,
  PERCENT_RANK() OVER (ORDER BY itemunitprice) AS perrankunitprice,
  ROW_NUMBER() OVER (ORDER BY itemunitprice) AS rownumunitprice,
  CUME_DIST() OVER (ORDER BY itemunitprice) AS cumdistunitprice,
  FROM ssitem;

cumulative distribution function

CREATE VIEW connex_sales_view AS 
  SELECT ssitem.itemid, itemname, itemcategory,itemunitprice, salesno, salesunits,
  salesdollar, salescost, timeyear, timemonth, timeday
  FROM ssitem, sssales, sstimedim
  WHERE itembrand = 'connex'
  AND timeyear BETWEEN 2010 AND 2012
  AND ssitem.itemid = sssales.itemid
  AND sstimedim.timeno = sssales.timeno;
SELECT itemname, itemcategory, itemunitprice, salesno, salesunits,
  salesdollar, salescost, timeyear, timemonth, timeday
  FROM connex_sales_view
  WHERE itemunitprice < 100 AND timeyear BETWEEN 2011 AND 2012;
CREATE MATERIALIZED VIEW MV1
  BUILD IMMEDIATE
  REFRESH COMPLETE ON DEMAND
  ENABLE QUERY REWRITE AS 
  SELECT storestate, timeyear, SUM(salesdollar) AS sumdollar
  FROM sssales, ssstore, sstimedim
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  AND timeyear > 2010
  GROUP BY storestate, timeyear;
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值