大数据技术之Hive SQL练习(中级)

  • 实验数据准备

注意实验时可能会出现Hive 查询视图时遇到中文乱码???问题解决方法

进入mysql,输入以下两条命令:

ALTER TABLE `TBLS` MODIFY COLUMN VIEW_EXPANDED_TEXT mediumtext CHARACTER SET utf8;

ALTER TABLE `TBLS` MODIFY COLUMN VIEW_ORIGINAL_TEXT mediumtext CHARACTER SET utf8;

 创建表并加载数据

--1.用户信息表

DROP TABLE IF EXISTS user_info;

CREATE table user_info(

    user_id string comment'用户id',

    gender string comment'性别',

    birthday string comment'生日'

)comment'用户信息表'

row format delimited fields terminated by '\t';

--数据装载

insert overwrite table user_info

values ('101', '男', '1990-01-01'),

       ('102', '女', '1991-02-01'),

       ('103', '女', '1992-03-01'),

       ('104', '男', '1993-04-01'),

       ('105', '女', '1994-05-01'),

       ('106', '男', '1995-06-01'),

       ('107', '女', '1996-07-01'),

       ('108', '男', '1997-08-01'),

       ('109', '女', '1998-09-01'),

       ('1010', '男', '1999-10-01');

--2. 商品信息表

DROP TABLE IF EXISTS sku_info;

CREATE TABLE sku_info(

    `sku_id`      string COMMENT '商品id',

    `name`        string COMMENT '商品名称',

    `category_id` string COMMENT '所属分类id',

    `from_date`   string COMMENT '上架日期',

    `price`       double COMMENT '商品单价'

) COMMENT '商品属性表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

hive>

insert overwrite table sku_info

values ('1', 'xiaomi 10', '1', '2020-01-01', 2000),

       ('2', '手机壳', '1', '2020-02-01', 10),

       ('3', 'apple 12', '1', '2020-03-01', 5000),

       ('4', 'xiaomi 13', '1', '2020-04-01', 6000),

       ('5', '破壁机', '2', '2020-01-01', 500),

       ('6', '洗碗机', '2', '2020-02-01', 2000),

       ('7', '热水壶', '2', '2020-03-01', 100),

       ('8', '微波炉', '2', '2020-04-01', 600),

       ('9', '自行车', '3', '2020-01-01', 1000),

       ('10', '帐篷', '3', '2020-02-01', 100),

       ('11', '烧烤架', '3', '2020-02-01', 50),

       ('12', '遮阳伞', '3', '2020-03-01', 20);

--3.商品分类信息表

DROP TABLE IF EXISTS category_info;

create table category_info(

    `category_id`   string comment '分类id',

    `category_name`  string comment '分类名称'

) COMMENT  '品类表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table category_info

values ('1','数码'),

       ('2','厨卫'),

       ('3','户外');

--4.订单信息表

DROP TABLE IF EXISTS order_info;

create table order_info(

    `order_id`     string COMMENT '订单id',

    `user_id`      string COMMENT '用户id',

    `create_date`  string COMMENT '下单日期',

    `total_amount` decimal(16, 2) COMMENT '订单总金额'

) COMMENT '订单表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table order_info

values ('1', '101', '2021-09-27', 29000.00),

       ('2', '101', '2021-09-28', 70500.00),

       ('3', '101', '2021-09-29', 43300.00),

       ('4', '101', '2021-09-30', 860.00),

       ('5', '102', '2021-10-01', 46180.00),

       ('6', '102', '2021-10-01', 50000.00),

       ('7', '102', '2021-10-01', 75500.00),

       ('8', '102', '2021-10-02', 6170.00),

       ('9', '103', '2021-10-02', 18580.00),

       ('10', '103', '2021-10-02', 28000.00),

       ('11', '103', '2021-10-02', 23400.00),

       ('12', '103', '2021-10-03', 5910.00),

       ('13', '104', '2021-10-03', 13000.00),

       ('14', '104', '2021-10-03', 69500.00),

       ('15', '104', '2021-10-03', 2000.00),

       ('16', '104', '2021-10-03', 5380.00),

       ('17', '105', '2021-10-04', 6210.00),

       ('18', '105', '2021-10-04', 68000.00),

       ('19', '105', '2021-10-04', 43100.00),

       ('20', '105', '2021-10-04', 2790.00),

       ('21', '106', '2021-10-04', 9390.00),

       ('22', '106', '2021-10-05', 58000.00),

       ('23', '106', '2021-10-05', 46600.00),

       ('24', '106', '2021-10-05', 5160.00),

       ('25', '107', '2021-10-05', 55350.00),

       ('26', '107', '2021-10-05', 14500.00),

       ('27', '107', '2021-10-06', 47400.00),

       ('28', '107', '2021-10-06', 6900.00),

       ('29', '108', '2021-10-06', 56570.00),

       ('30', '108', '2021-10-06', 44500.00),

       ('31', '108', '2021-10-07', 50800.00),

       ('32', '108', '2021-10-07', 3900.00),

       ('33', '109', '2021-10-07', 41480.00),

       ('34', '109', '2021-10-07', 88000.00),

       ('35', '109', '2020-10-08', 15000.00),

       ('36', '109', '2020-10-08', 9020.00),

       ('37', '1010', '2020-10-08', 9260.00),

       ('38', '1010', '2020-10-08', 12000.00),

       ('39', '1010', '2020-10-08', 23900.00),

       ('40', '1010', '2020-10-08', 6790.00);

--5.订单明细表

DROP TABLE IF EXISTS order_detail;

CREATE TABLE order_detail

(

    `order_detail_id` string COMMENT '订单明细id',

    `order_id`        string COMMENT '订单id',

    `sku_id`          string COMMENT '商品id',

    `create_date`     string COMMENT '下单日期',

    `price`           decimal(16, 2) COMMENT '下单时的商品单价',

    `sku_num`         int COMMENT '下单商品件数'

) COMMENT '订单明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

INSERT overwrite table order_detail

values ('1', '1', '1', '2021-09-27', 2000.00, 2),

       ('2', '1', '3', '2021-09-27', 5000.00, 5),

       ('3', '2', '4', '2021-09-28', 6000.00, 9),

       ('4', '2', '5', '2021-09-28', 500.00, 33),

       ('5', '3', '7', '2021-09-29', 100.00, 37),

       ('6', '3', '8', '2021-09-29', 600.00, 46),

       ('7', '3', '9', '2021-09-29', 1000.00, 12),

       ('8', '4', '12', '2021-09-30', 20.00, 43),

       ('9', '5', '1', '2021-10-01', 2000.00, 8),

       ('10', '5', '2', '2021-10-01', 10.00, 18),

       ('11', '5', '3', '2021-10-01', 5000.00, 6),

       ('12', '6', '4', '2021-10-01', 6000.00, 8),

       ('13', '6', '6', '2021-10-01', 2000.00, 1),

       ('14', '7', '7', '2021-10-01', 100.00, 17),

       ('15', '7', '8', '2021-10-01', 600.00, 48),

       ('16', '7', '9', '2021-10-01', 1000.00, 45),

       ('17', '8', '10', '2021-10-02', 100.00, 48),

       ('18', '8', '11', '2021-10-02', 50.00, 15),

       ('19', '8', '12', '2021-10-02', 20.00, 31),

       ('20', '9', '1', '2021-09-30', 2000.00, 9),

       ('21', '9', '2', '2021-10-02', 10.00, 5800),

       ('22', '10', '4', '2021-10-02', 6000.00, 1),

       ('23', '10', '5', '2021-10-02', 500.00, 24),

       ('24', '10', '6', '2021-10-02', 2000.00, 5),

       ('25', '11', '8', '2021-10-02', 600.00, 39),

       ('26', '12', '10', '2021-10-03', 100.00, 47),

       ('27', '12', '11', '2021-10-03', 50.00, 19),

       ('28', '12', '12', '2021-10-03', 20.00, 13000),

       ('29', '13', '1', '2021-10-03', 2000.00, 4),

       ('30', '13', '3', '2021-10-03', 5000.00, 1),

       ('31', '14', '4', '2021-10-03', 6000.00, 5),

       ('32', '14', '5', '2021-10-03', 500.00, 47),

       ('33', '14', '6', '2021-10-03', 2000.00, 8),

       ('34', '15', '7', '2021-10-03', 100.00, 20),

       ('35', '16', '10', '2021-10-03', 100.00, 22),

       ('36', '16', '11', '2021-10-03', 50.00, 42),

       ('37', '16', '12', '2021-10-03', 20.00, 7400),

       ('38', '17', '1', '2021-10-04', 2000.00, 3),

       ('39', '17', '2', '2021-10-04', 10.00, 21),

       ('40', '18', '4', '2021-10-04', 6000.00, 8),

       ('41', '18', '5', '2021-10-04', 500.00, 28),

       ('42', '18', '6', '2021-10-04', 2000.00, 3),

       ('43', '19', '7', '2021-10-04', 100.00, 55),

       ('44', '19', '8', '2021-10-04', 600.00, 11),

       ('45', '19', '9', '2021-10-04', 1000.00, 31),

       ('46', '20', '11', '2021-10-04', 50.00, 45),

       ('47', '20', '12', '2021-10-04', 20.00, 27),

       ('48', '21', '1', '2021-10-04', 2000.00, 2),

       ('49', '21', '2', '2021-10-04', 10.00, 39),

       ('50', '21', '3', '2021-10-04', 5000.00, 1),

       ('51', '22', '4', '2021-10-05', 6000.00, 8),

       ('52', '22', '5', '2021-10-05', 500.00, 20),

       ('53', '23', '7', '2021-10-05', 100.00, 58),

       ('54', '23', '8', '2021-10-05', 600.00, 18),

       ('55', '23', '9', '2021-10-05', 1000.00, 30),

       ('56', '24', '10', '2021-10-05', 100.00, 27),

       ('57', '24', '11', '2021-10-05', 50.00, 28),

       ('58', '24', '12', '2021-10-05', 20.00, 53),

       ('59', '25', '1', '2021-10-05', 2000.00, 5),

       ('60', '25', '2', '2021-10-05', 10.00, 35),

       ('61', '25', '3', '2021-10-05', 5000.00, 9),

       ('62', '26', '4', '2021-10-05', 6000.00, 1),

       ('63', '26', '5', '2021-10-05', 500.00, 13),

       ('64', '26', '6', '2021-10-05', 2000.00, 1),

       ('65', '27', '7', '2021-10-06', 100.00, 30),

       ('66', '27', '8', '2021-10-06', 600.00, 19),

       ('67', '27', '9', '2021-10-06', 1000.00, 33),

       ('68', '28', '10', '2021-10-06', 100.00, 37),

       ('69', '28', '11', '2021-10-06', 50.00, 46),

       ('70', '28', '12', '2021-10-06', 20.00, 45),

       ('71', '29', '1', '2021-10-06', 2000.00, 8),

       ('72', '29', '2', '2021-10-06', 10.00, 57),

       ('73', '29', '3', '2021-10-06', 5000.00, 8),

       ('74', '30', '4', '2021-10-06', 6000.00, 3),

       ('75', '30', '5', '2021-10-06', 500.00, 33),

       ('76', '30', '6', '2021-10-06', 2000.00, 5),

       ('77', '31', '8', '2021-10-07', 600.00, 13),

       ('78', '31', '9', '2021-10-07', 1000.00, 43),

       ('79', '32', '10', '2021-10-07', 100.00, 24),

       ('80', '32', '11', '2021-10-07', 50.00, 30),

       ('81', '33', '1', '2021-10-07', 2000.00, 8),

       ('82', '33', '2', '2021-10-07', 10.00, 48),

       ('83', '33', '3', '2021-10-07', 5000.00, 5),

       ('84', '34', '4', '2021-10-07', 6000.00, 10),

       ('85', '34', '5', '2021-10-07', 500.00, 44),

       ('86', '34', '6', '2021-10-07', 2000.00, 3),

       ('87', '35', '8', '2020-10-08', 600.00, 25),

       ('88', '36', '10', '2020-10-08', 100.00, 57),

       ('89', '36', '11', '2020-10-08', 50.00, 44),

       ('90', '36', '12', '2020-10-08', 20.00, 56),

       ('91', '37', '1', '2020-10-08', 2000.00, 2),

       ('92', '37', '2', '2020-10-08', 10.00, 26),

       ('93', '37', '3', '2020-10-08', 5000.00, 1),

       ('94', '38', '6', '2020-10-08', 2000.00, 6),

       ('95', '39', '7', '2020-10-08', 100.00, 35),

       ('96', '39', '8', '2020-10-08', 600.00, 34),

       ('97', '40', '10', '2020-10-08', 100.00, 37),

       ('98', '40', '11', '2020-10-08', 50.00, 51),

       ('99', '40', '12', '2020-10-08', 20.00, 27);

--6.登录明细表

DROP TABLE IF EXISTS user_login_detail;

CREATE TABLE user_login_detail

(

    `user_id`    string comment '用户id',

    `ip_address` string comment 'ip地址',

    `login_ts`   string comment '登录时间',

    `logout_ts`  string comment '登出时间'

) COMMENT '用户登录明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

INSERT overwrite table user_login_detail

VALUES ('101', '180.149.130.161', '2021-09-21 08:00:00', '2021-09-27 08:30:00'),

       ('101', '180.149.130.161', '2021-09-27 08:00:00', '2021-09-27 08:30:00'),

       ('101', '180.149.130.161', '2021-09-28 09:00:00', '2021-09-28 09:10:00'),

       ('101', '180.149.130.161', '2021-09-29 13:30:00', '2021-09-29 13:50:00'),

       ('101', '180.149.130.161', '2021-09-30 20:00:00', '2021-09-30 20:10:00'),

       ('102', '120.245.11.2', '2021-09-22 09:00:00', '2021-09-27 09:30:00'),

       ('102', '120.245.11.2', '2021-10-01 08:00:00', '2021-10-01 08:30:00'),

       ('102', '180.149.130.174', '2021-10-01 07:50:00', '2021-10-01 08:20:00'),

       ('102', '120.245.11.2', '2021-10-02 08:00:00', '2021-10-02 08:30:00'),

       ('103', '27.184.97.3', '2021-09-23 10:00:00', '2021-09-27 10:30:00'),

       ('103', '27.184.97.3', '2021-10-03 07:50:00', '2021-10-03 09:20:00'),

       ('104', '27.184.97.34', '2021-09-24 11:00:00', '2021-09-27 11:30:00'),

       ('104', '27.184.97.34', '2021-10-03 07:50:00', '2021-10-03 08:20:00'),

       ('104', '27.184.97.34', '2021-10-03 08:50:00', '2021-10-03 10:20:00'),

       ('104', '120.245.11.89', '2021-10-03 08:40:00', '2021-10-03 10:30:00'),

       ('105', '119.180.192.212', '2021-10-04 09:10:00', '2021-10-04 09:30:00'),

       ('106', '119.180.192.66', '2021-10-04 08:40:00', '2021-10-04 10:30:00'),

       ('106', '119.180.192.66', '2021-10-05 21:50:00', '2021-10-05 22:40:00'),

       ('107', '219.134.104.7', '2021-09-25 12:00:00', '2021-09-27 12:30:00'),

       ('107', '219.134.104.7', '2021-10-05 22:00:00', '2021-10-05 23:00:00'),

       ('107', '219.134.104.7', '2021-10-06 09:10:00', '2021-10-06 10:20:00'),

       ('107', '27.184.97.46', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),

       ('108', '101.227.131.22', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),

       ('108', '101.227.131.22', '2021-10-06 22:00:00', '2021-10-06 23:00:00'),

       ('109', '101.227.131.29', '2021-09-26 13:00:00', '2021-09-27 13:30:00'),

       ('109', '101.227.131.29', '2021-10-06 08:50:00', '2021-10-06 10:20:00'),

       ('109', '101.227.131.29', '2021-10-08 09:00:00', '2021-10-08 09:10:00'),

       ('1010', '119.180.192.10', '2021-09-27 14:00:00', '2021-09-27 14:30:00'),

       ('1010', '119.180.192.10', '2021-10-09 08:50:00', '2021-10-09 10:20:00');

--7.商品价格变更明细表

DROP TABLE IF EXISTS sku_price_modify_detail;

CREATE TABLE sku_price_modify_detail

(

    `sku_id`      string comment '商品id',

    `new_price`   decimal(16, 2) comment '更改后的价格',

    `change_date` string comment '变动日期'

) COMMENT '商品价格变更明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table sku_price_modify_detail

values ('1', 1900, '2021-09-25'),

       ('1', 2000, '2021-09-26'),

       ('2', 80, '2021-09-29'),

       ('2', 10, '2021-09-30'),

       ('3', 4999, '2021-09-25'),

       ('3', 5000, '2021-09-26'),

       ('4', 5600, '2021-09-26'),

       ('4', 6000, '2021-09-27'),

       ('5', 490, '2021-09-27'),

       ('5', 500, '2021-09-28'),

       ('6', 1988, '2021-09-30'),

       ('6', 2000, '2021-10-01'),

       ('7', 88, '2021-09-28'),

       ('7', 100, '2021-09-29'),

       ('8', 800, '2021-09-28'),

       ('8', 600, '2021-09-29'),

       ('9', 1100, '2021-09-27'),

       ('9', 1000, '2021-09-28'),

       ('10', 90, '2021-10-01'),

       ('10', 100, '2021-10-02'),

       ('11', 66, '2021-10-01'),

       ('11', 50, '2021-10-02'),

       ('12', 35, '2021-09-28'),

       ('12', 20, '2021-09-29');

--8.配送信息表

DROP TABLE IF EXISTS delivery_info;

CREATE TABLE delivery_info

(  `delivery_id` string comment '配送单id',

    `order_id`    string comment '订单id',

    `user_id`     string comment '用户id',

    `order_date`  string comment '下单日期',

    `custom_date` string comment '期望配送日期'

) COMMENT '邮寄信息表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table delivery_info

values ('1', '1', '101', '2021-09-27', '2021-09-29'),

       ('2', '2', '101', '2021-09-28', '2021-09-28'),

       ('3', '3', '101', '2021-09-29', '2021-09-30'),

       ('4', '4', '101', '2021-09-30', '2021-10-01'),

       ('5', '5', '102', '2021-10-01', '2021-10-01'),

       ('6', '6', '102', '2021-10-01', '2021-10-01'),

       ('7', '7', '102', '2021-10-01', '2021-10-03'),

       ('8', '8', '102', '2021-10-02', '2021-10-02'),

       ('9', '9', '103', '2021-10-02', '2021-10-03'),

       ('10', '10', '103', '2021-10-02', '2021-10-04'),

       ('11', '11', '103', '2021-10-02', '2021-10-02'),

       ('12', '12', '103', '2021-10-03', '2021-10-03'),

       ('13', '13', '104', '2021-10-03', '2021-10-04'),

       ('14', '14', '104', '2021-10-03', '2021-10-04'),

       ('15', '15', '104', '2021-10-03', '2021-10-03'),

       ('16', '16', '104', '2021-10-03', '2021-10-03'),

       ('17', '17', '105', '2021-10-04', '2021-10-04'),

       ('18', '18', '105', '2021-10-04', '2021-10-06'),

       ('19', '19', '105', '2021-10-04', '2021-10-06'),

       ('20', '20', '105', '2021-10-04', '2021-10-04'),

       ('21', '21', '106', '2021-10-04', '2021-10-04'),

       ('22', '22', '106', '2021-10-05', '2021-10-05'),

       ('23', '23', '106', '2021-10-05', '2021-10-05'),

       ('24', '24', '106', '2021-10-05', '2021-10-07'),

       ('25', '25', '107', '2021-10-05', '2021-10-05'),

       ('26', '26', '107', '2021-10-05', '2021-10-06'),

       ('27', '27', '107', '2021-10-06', '2021-10-06'),

       ('28', '28', '107', '2021-10-06', '2021-10-07'),

       ('29', '29', '108', '2021-10-06', '2021-10-06'),

       ('30', '30', '108', '2021-10-06', '2021-10-06'),

       ('31', '31', '108', '2021-10-07', '2021-10-09'),

       ('32', '32', '108', '2021-10-07', '2021-10-09'),

       ('33', '33', '109', '2021-10-07', '2021-10-08'),

       ('34', '34', '109', '2021-10-07', '2021-10-08'),

       ('35', '35', '109', '2021-10-08', '2021-10-10'),

       ('36', '36', '109', '2021-10-08', '2021-10-09'),

       ('37', '37', '1010', '2021-10-08', '2021-10-10'),

       ('38', '38', '1010', '2021-10-08', '2021-10-10'),

       ('39', '39', '1010', '2021-10-08', '2021-10-09'),

       ('40', '40', '1010', '2021-10-08', '2021-10-09');

--9.好友关系表

--注:表中一行数据中的两个user_id,表示两个用户互为好友。

DROP TABLE IF EXISTS friendship_info;

CREATE TABLE friendship_info(

    `user1_id` string comment '用户1id',

    `user2_id` string comment '用户2id'

) COMMENT '用户关系表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table friendship_info

values ('101', '1010'),

       ('101', '108'),

       ('101', '106'),

       ('101', '104'),

       ('101', '102'),

       ('102', '1010'),

       ('102', '108'),

       ('102', '106'),

       ('102', '104'),

       ('102', '102'),

       ('103', '1010'),

       ('103', '108'),

       ('103', '106'),

       ('103', '104'),

       ('103', '102'),

       ('104', '1010'),

       ('104', '108'),

       ('104', '106'),

       ('104', '104'),

       ('104', '102'),

       ('105', '1010'),

       ('105', '108'),

       ('105', '106'),

       ('105', '104'),

       ('105', '102'),

       ('106', '1010'),

       ('106', '108'),

       ('106', '106'),

       ('106', '104'),

       ('106', '102'),

       ('107', '1010'),

       ('107', '108'),

       ('107', '106'),

       ('107', '104'),

       ('107', '102'),

       ('108', '1010'),

       ('108', '108'),

       ('108', '106'),

       ('108', '104'),

       ('108', '102'),

       ('109', '1010'),

       ('109', '108'),

       ('109', '106'),

       ('109', '104'),

       ('109', '102'),

       ('1010', '1010'),

       ('1010', '108'),

       ('1010', '106'),

       ('1010', '104'),

       ('1010', '102');

--10.收藏信息表

DROP TABLE IF EXISTS favor_info;

CREATE TABLE favor_info

(   `user_id`     string comment '用户id',

    `sku_id`      string comment '商品id',

    `create_date` string comment '收藏日期'

) COMMENT '用户收藏表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table favor_info

values ('101', '3', '2021-09-23'),

       ('101', '12', '2021-09-23'),

       ('101', '6', '2021-09-25'),

       ('101', '10', '2021-09-21'),

       ('101', '5', '2021-09-25'),

       ('102', '1', '2021-09-24'),

       ('102', '2', '2021-09-24'),

       ('102', '8', '2021-09-23'),

       ('102', '12', '2021-09-22'),

       ('102', '11', '2021-09-23'),

       ('102', '9', '2021-09-25'),

       ('102', '4', '2021-09-25'),

       ('102', '6', '2021-09-23'),

       ('102', '7', '2021-09-26'),

       ('103', '8', '2021-09-24'),

       ('103', '5', '2021-09-25'),

       ('103', '6', '2021-09-26'),

       ('103', '12', '2021-09-27'),

       ('103', '7', '2021-09-25'),

       ('103', '10', '2021-09-25'),

       ('103', '4', '2021-09-24'),

       ('103', '11', '2021-09-25'),

       ('103', '3', '2021-09-27'),

       ('104', '9', '2021-09-28'),

       ('104', '7', '2021-09-28'),

       ('104', '8', '2021-09-25'),

       ('104', '3', '2021-09-28'),

       ('104', '11', '2021-09-25'),

       ('104', '6', '2021-09-25'),

       ('104', '12', '2021-09-28'),

       ('105', '8', '2021-10-08'),

       ('105', '9', '2021-10-07'),

       ('105', '7', '2021-10-07'),

       ('105', '11', '2021-10-06'),

       ('105', '5', '2021-10-07'),

       ('105', '4', '2021-10-05'),

       ('105', '10', '2021-10-07'),

       ('106', '12', '2021-10-08'),

       ('106', '1', '2021-10-08'),

       ('106', '4', '2021-10-04'),

       ('106', '5', '2021-10-08'),

       ('106', '2', '2021-10-04'),

       ('106', '6', '2021-10-04'),

       ('106', '7', '2021-10-08'),

       ('107', '5', '2021-09-29'),

       ('107', '3', '2021-09-28'),

       ('107', '10', '2021-09-27'),

       ('108', '9', '2021-10-08'),

       ('108', '3', '2021-10-10'),

       ('108', '8', '2021-10-10'),

       ('108', '10', '2021-10-07'),

       ('108', '11', '2021-10-07'),

       ('109', '2', '2021-09-27'),

       ('109', '4', '2021-09-29'),

       ('109', '5', '2021-09-29'),

       ('109', '9', '2021-09-30'),

       ('109', '8', '2021-09-26'),

       ('1010', '2', '2021-09-29'),

       ('1010', '9', '2021-09-29'),

       ('1010', '1', '2021-10-01');

 二、练习题

1. 查询累积销量排名第二的商品

题目需求查询订单明细表(order_detail)中销量(下单件数)排名第二的sku_id,如果不存在返回null,如果存在多个排名第二的商品则需要全部返回。期望结果如下:

分析

代码分析

  1. 按 sku_id分组求和

 Drop view if exists v_1;

Create view v_1 as select sku_id,sum(sku_num) order_num from order_detail  group by sku_id;

Select * from v_1;

  1. 根据order_num 求名次

 Drop view if exists v_2;

Create view v_1 as  select  sku_id,order_num,dense_rank()  over(order by order_num)rk from v_1;

注意:(1)dense_rank() over(order by sku_num desc)中over() 没有partition by 字段

根据整个记录集范围内按销量进行排序,有可能存在并列情况

考虑有并列情况

(2) 以下排序函数,若第3名有并列情况,在并列名次区别

row_number() over(order by sku_num desc)

  1,2,3,4,5

rnak()dense_rank() over(order by sku_num desc)

1,2,3,3,4

dense_rank() over(order by sku_num desc)

1,2,3,3,5

  1. 显示结果

Select sku_id  from v_1  where rk=2;

代码实现

hive>

select sku_id

from ( select sku_id

         from ( select sku_id,order_num,dense_rank() over (order by order_num desc) rk

                  from (

                     select sku_id,sum(sku_num) order_num from order_detail   group by sku_id

                       ) t1

              ) t2

         where rk = 2

     ) t3

         right join --为保证,没有第二名的情况下,返回null

     ( select 1 ) t4;

2. 查询至少连续三天下单用户

 题目需求  查询订单信息表(order_info)中最少连续3天下单的用户id,期望结果如下:

   分析:

    本题重点   user_id,create_date

(1)分组并去重 ( user_id, create_date相同的只有一条记录)

  Drop view if exists v_1;

 Create view v_1 as select distinct  user_id, create_date from order_info  

   

(2)根据user_id 分组,按create_date字段排序

 Drop view if exists v_2;

create view  v_2 as  

select user_id, create_date, date_sub(create_date, row_number() over (partition by user_id order by create_date)) flag from v_1;

(3)

  Select user_id  from v_2  group by user_id,flag having count(flag)>=3;

   结果如下:

   

代码:

hive>

select distinct user_id

from (

         select user_id

         from ( select user_id, create_date, date_sub(create_date, row_number() over (partition by user_id order by create_date)) flag

                  from ( select user_id, create_date from order_info  group by user_id, create_date

                       ) t1 --同一天同一个用户可以下单多次,所以要分组 同一天可能多个用户下单,进行去重

              ) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果

         group by user_id, flag

         having count(flag) >= 3 -- 连续下单大于等于三天

     ) t3;

3. 查询各品类销售商品的种类数及销量最高的商品

题目需求:从订单明细表(order_detail)统计各品类销售出的商品种类数累积销量最好的商品,期望结果如下:

category_id

category_name

sku_id

name

order_num

sku_cnt

1

数码

2

手机壳

302

4

2

厨卫

8

微波炉

253

4

3

户外

12

遮阳伞

349

4

category_id(分类id)

category_name(分类名称)

sku_id(销量最好的商品id)

name(商品名称)

order_num(销量最好的商品销量)

sku_cnt(商品种类数量)

本题题义

   显示6个字段 连接三个表,

       其中order_num是同一商品销量

ku_cnt同一类别商品类别

  1. 统计各商品销售数量

Drop view if exists v_1;

Create view v_1  as select sku_id, sum(sku_num) order_num  from order_detail  group by sku_id;

Select *  v_1;

  1. 三表内接

Drop view if exists v_2;

Create view v_2 as  Select cate.category_id,cate.category_name ,v.sku_id,sku.name,v.order_num from v_1 v

join sku_info sku

join category_info cate on  sku.sku_id=v.sku_id and cate.category_id=sku.category_id

  1. 统计商品类别数量,根据商品类别 进行销售数量排序

Drop view if exists v_3;

Create view v_3 as Select category_id,category_name,sku_id,name,order_num,

rank() over(partition by category_id order by order_num desc) rk,

count(*) over(partition by category_id) s

From  v_2

4查询结果

 Select * from v_3  where rk=1;

 代码:            

hive>

select category_id,

       category_name,

       sku_id,

       name,

       order_num,

       sku_cnt

from (  select od.sku_id,

                sku.name,

                sku.category_id,

                cate.category_name,

                order_num,

                rank() over (partition by sku.category_id order by order_num desc) rk,

                count(od.sku_id) over (partition by sku.category_id) sku_cnt

           from ( select sku_id, sum(sku_num) order_num  from order_detail  group by sku_id ) od

           left join   sku_info sku  on od.sku_id = sku.sku_id

           left join   category_info cate   on sku.category_id = cate.category_id ) t1

where rk = 1;

4.查询用户的累计消费金额及VIP等级

题目需求

从订单信息表(order_info)中统计每个用户截止其每个下单日期的累积消费金额,以及每个用户在其每个下单日期的VIP等级。用户vip等级根据累积消费金额计算,计算规则如下:

设累积消费总额为X,

若0=<X<10000,则vip等级为普通会员

若10000<=X<30000,则vip等级为青铜会员

若30000<=X<50000,则vip等级为白银会员

若50000<=X<80000,则vip为黄金会员

若80000<=X<100000,则vip等级为白金会员

若X>=100000,则vip等级为钻石会员

期望结果如下:

user_id(用户id)  create_date(下单日期) sum_so_far

(截至每个下单日期的累计下单金额)

vip_level(每个下单日期的VIP等级)

101   2021-09-27   29000.00  青铜会员

101   2021-09-28   99500.00  白金会员

101   2021-09-29   142800.00  钻石会员

101   2021-09-30   143660.00  钻石会员

102   2021-10-01   171680.00  钻石会员

102    2021-10-02  177850.00  钻石会员

103    2021-10-02  69980.00   黄金会员

103    2021-10-03  75890.00  黄金会员

104    2021-10-03  89880.00  白金会员

105   2021-10-04   120100.00  钻石会员

106   2021-10-04    9390.00    普通会员

106   2021-10-05  119150.00  钻石会员

107   2021-10-05   69850.00  黄金会员

107  2021-10-06    124150.00 钻石会员

108  2021-10-06   101070.00钻石会员

108   2021-10-07  155770.00 钻石会员

109  2021-10-07  129480.00钻石会员

109   2021-10-08  153500.00钻石会员

1010   2021-10-08  51950.00  黄金会员

分析:

  1. 用户每个下单日期金额  根据user_id、create_date分组求和

   下次日期金额采用表 order.info字段total_amount

        同一单中 表order.detail中 price*sku_num和与order.info字段total_amount相等。

  Drop view if exists v_1;

Create view v_1 as select user_id,create_date,sum(total_amount)s from order_info group by   user_id,create_date;

Select * from v_1;

  1. 累计求和

 Drop view if exists v_2;

Create view v_2  as  select *,sum(s) over(partition by user_id order by create_date) sum_so_far

From  v_1;

  1. 查询结果

 Select user_id,create_date, sum_so_far,

      case  when sum_so_far >= 100000 then '钻石会员'

            when sum_so_far >= 80000 then '白金会员'

            when sum_so_far >= 50000 then '黄金会员'

            when sum_so_far >= 30000 then '白银会员'

            when sum_so_far >= 10000 then '青铜会员'

            when sum_so_far >= 0 then '普通会员'

        end vip_level

      From v_2;

注意:在查询视图时中文字段出现????

处理办法:

在MySQL中执行下面的两句修改命令,将编码修改为utf8

#mysql -uroot -p

Mysql>use hive ;

 mysql> ALTER TABLE `TBLS` MODIFY COLUMN VIEW_EXPANDED_TEXT mediumtext CHARACTER SET utf8;

mysql> ALTER TABLE `TBLS` MODIFY COLUMN VIEW_ORIGINAL_TEXT mediumtext CHARACTER SET utf8;

代码实现

hive>

select user_id,

       create_date,

       sum_so_far,

       case

           when sum_so_far >= 100000 then '钻石会员'

           when sum_so_far >= 80000 then '白金会员'

           when sum_so_far >= 50000 then '黄金会员'

           when sum_so_far >= 30000 then '白银会员'

           when sum_so_far >= 10000 then '青铜会员'

           when sum_so_far >= 0 then '普通会员'

           end vip_level

from (

         select user_id,

                create_date,

                sum(total_amount_per_day) over (partition by user_id order by create_date) sum_so_far

         from (

                  select user_id,

                         create_date,

                         sum(total_amount) total_amount_per_day

                  from order_info

                  group by user_id, create_date

              ) t1

     ) t2;

5. 查询首次下单后第二天连续下单的用户比率

题目需求

从订单信息表(order_info)中查询首次下单后第二天仍然下单的用户占所有下单用户的比例,结果保留一位小数,使用百分数显示,期望结果如下:

percentage

60.0%

分析

分析

  1. 取两字段并去重

   Drop view if exists v_1;

   Create view  v_1  as select user_id, create_date  from order_info  group by user_id, create_date

  1. 根据user_id分组,  按create_date排名次,为下一步,保证每一分组至少有两条记录

 Drop view if exists v_2;

   Create view  v_2  as  select user_id, create_date, rank() over (partition by user_id order by create_date) rk

               From  v_1

  1. 每组最多取两个记录 ,每组中取一个最大,最小

Drop view if exists v_3;

Create view v_3  as select user_id, min(create_date) buy_date_first, max(create_date) buy_date_second  from v_2  where rk<=2 group by user_id

(4)

select concat(round(sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0)) / count(*) * 100, 1), '%') percentage  from  v_3

注意:

 x= datediff(buy_date_second, buy_date_first) = buy_date_second- buy_date_first

  If(x = 1, 1, 0)  //如果=1

 统计相邻天下单用户数

sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0))  相邻天下单+1 否则+0

Concat(a,b) 将a与b连接成字符串

代码实现

select concat(round(sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0)) / count(*) * 100, 1), '%') percentage

from (select user_id, min(create_date) buy_date_first, max(create_date) buy_date_second

         from ( select user_id, create_date, rank() over (partition by user_id order by create_date) rk

               from ( select user_id, create_date  from order_info  group by user_id, create_date

                       ) t1

              ) t2

         where rk <= 2  group by user_id

     ) t3;

6.每个商品销售首年年份、销售数量和销售金额

题目需求:从订单明细表(order_detail)统计每个商品销售首年的年份,销售数量和销售总额。

期望结果如下:

sku_id(商品id)year(销售首年年份)order_num(首年销量)order_amount(首年销售金额)

分析

  1. 转换日期并按年求和

 Drop view if exists v_1;

Create view v_1  as select sku_id,year(create_date)year, sku_num,price,price*sku_num s  from order_detail ;

(2)求和

 Drop view if exists v_2;

Create view v_2  as select sku_id,year, sum(sku_num)sku_sum,sum(s)salary  from v_1  group by  sku_id,year;

(3) 排序

   Drop view if exists v_3;

Create view v_3  as select sku_id,year,sku_sum,salary,rank() over(partition by sku_id order by year)rk from v_2

  1. 查询

 Select sku_id,year,sku_sum,salary from v_3 where rk=1;

代码

select sku_id,

       year(create_date) year,

       sum(sku_num) order_num,

       sum(price*sku_num) order_amount

from (    select order_id,

                sku_id,

                price,

                sku_num,

                create_date,

                rank() over (partition by sku_id order by year(create_date)) rk

         from order_detail

     ) t1

where rk = 1

group by sku_id,year(create_date);

7 筛选去年总销量小于100的商品

 题目需求:从订单明细表(order_detail)中筛选出去年总销量小于100的商品及其销量,假设今天的日期是2022-01-10,不考虑上架时间小于一个月(30天)的商品,期望结果如下:

 分析:

首先由生成记录集

Sku_id   sku_num  create_date   name   from_date(上架时间)

筛选记录  条件 year(create_date)=’2021’ and  from_date<date_sub(‘2022-01-10’,30)  

  1. 连接两表,生成需要字段,不考虑上架时间小于一个月的商品

 Drop view if exists v_1;

Create view v_1 as select t1.sku_id,t1.sku_num,t1.create_date,t2.name,t2.from_date  from order_detail t1 join sku_info t2  on t1.sku_id=t2.sku_id where datediff('2022-01-10',t2.from_date)>=30 and year(create_date)='2021'

  1. 销量求和,按条件查询

  Select sku_id,sum(sku_num) from v_1  group by sku_id  having sum(sku_num)<100;

   

8 查询每日新用户数

题目需求:从用户登录明细表(user_login_detail)中查询每天的新增用户数,若一个用户在某天登录了,且在这一天之前没登录过,则任务该用户为这一天的新增用户。期望结果如下:

login_date_first(日期)user_count(新增用户数)

分析:

(1)求每个用户最小日期

 select  user_id, min(date_format(login_ts,'yyyy-MM-dd')) login_date_first  from user_login_detail  group by user_id

  1. 先求每个用户首次登录的时间
  2. 首次登录时间分组统计用户的数量就是那一天新增用户数量

注意:由于login_ts字段值格式是,'yyyy-MM-dd  hh:mm:ss' 要格式化为,'yyyy-MM-dd'

代码:

select  login_date_first,  count(*) user_count

from(  select  user_id, min(date_format(login_ts,'yyyy-MM-dd'))  login_date_first

             from user_login_detail  group by user_id )t1

group by login_date_first;

9.统计每个商品销量最高的日期

题目需求:从订单明细表(order_detail)中统计出每种商品销售件数最多的日期及当日销量,如果有同一商品多日销量并列的情况,取其中的最小日期。

sku_id(商品id)  create_date(销量最高的日期)  sum_num(销量)

分析

  1. 统计每种商品每日销量

 Drop view if exists v_1;

Create view v_1 as   select sku_id, create_date, sum(sku_num) sum_num

                  from order_detail  group by sku_id, create_date

  1. 销量排序

Drop view if exists v_2;

Create view v_2 as   select sku_id,  create_date,  sum_num,

                row_number() over (partition by sku_id order by sum_num desc, create_date asc) rn

         from v_1

 注意  如果有同一商品多日销量并列的情况,取其中的最小日期 是这条语句实现

row_number() over (partition by sku_id order by sum_num desc,create_date asc) rn

                                     按销量逆序          日期顺序排序

所以表中第一记录是销量最大,每天销量  

  1. 查询结果

 Select  * from v_2 wherern=1;

代码:

select sku_id,

       create_date,

       sum_num

from (

         select sku_id,

                create_date,

                sum_num,

                row_number() over (partition by sku_id order by sum_num desc,create_date asc) rn

         from (

                  select sku_id,

                         create_date,

                         sum(sku_num) sum_num

                  from order_detail

                  group by sku_id, create_date

              ) t1

     ) t2

where rn = 1;

     

10 查询销售件数高于品类平均数的商品

题目需求 从订单明细表(order_detail)中查询累积销售件数高于其所属品类平均数的商品。

分析

 每种商品销售件数高于品类(category_id)平均数

   显示sku_id  name   sum_num cate_avg_num

   本题关键是某种商品品类平均数求法

      每种商品都属于某一品类 ,每一品类都 有一个category_id, 品类平均数是将品类所有商品销量和除以这类商品数量。

分析

  1. 统计每种商品销量和

  Drop view if exists v_1;

  Create view  v_1 as select sku_id,sum(sku_num)sum_num from order_detail group by sku_id ;

  Select * from v_1;

  1. 统计每种商品按品种类型求平均销售数

  Create view v_2 as select t1.sku_id,t2.category_id,t2.name,t1.sum_num,

                         Avg(t1.sum_num) over(partition by t2.category_id) cate_avg_num

From v_1 t1

Left join sku_info t2  on t1.sku_id=t2.sku_id

  1. 查询sum_num>cate_avg_num记录

代码:

select sku_id,

       name,

       sum_num,

       cate_avg_num

from ( select od.sku_id, category_id,name, sum_num,

                avg(sum_num) over (partition by category_id ) cate_avg_num

         from ( select sku_id, sum(sku_num) sum_num from order_detail  group by sku_id

              ) od

              left join sku_info sku

              on od.sku_id = sku.sku_id) t1

where sum_num > cate_avg_num;

11 用户注册、登录、下单综合统计

题目需求从用户登录明细表(user_login_detail)和订单信息表(order_info)中查询每个用户的注册日期(首次登录日期)、总登录次数以及其在2021年的登录次数、订单数和订单总额。期望结果如下:

user_id(用户id)register_date(注册日期) total_login_count(累积登录次数) login_count_2021(2021年登录次数)

order_count_2021(2021年下单次数)    order_amount_2021(2021年订单金额)

分析

  1.  此题从表user_login_detail找出用户的最早最早注册日期,统计登录次数,2021登录次数
  2. order_info表找出 2021年订单数和订单额
  3.  两个查询结果join

(1)找出 user_login_detail中最早注册日期,统计登录次数,2021登录次数

Drop view if exists v_1;

Create view v_1 as  select user_id,

            min(date_format(login_ts, 'yyyy-MM-dd')) register_date,

           count(1) total_login_count,

           count(if(year(login_ts) = '2021', 1, null)) login_count_2021

            from user_login_detail

            group by user_id

(2)查询用户2021销售当量及销售额

Drop view if exists v_2;

Create view v_2 as  select user_id,

               count(order_id) order_count_2021,

               sum(total_amount) order_amount_2021

         from order_info

         where year(create_date) = '2021'

         group by user_id

代码:

Select login.user_id,register_date,total_login_count,login_count_2021,order_count_2021, order_amount_2021

from (  select user_id,

            min(date_format(login_ts, 'yyyy-MM-dd')) register_date,

            count(1) total_login_count,

           count(if(year(login_ts) = '2021', 1, null)) login_count_2021

         from user_login_detail

         group by user_id

     ) login

         join

     (

         select user_id,

               count(order_id) order_count_2021,

               sum(total_amount) order_amount_2021

         from order_info

         where year(create_date) = '2021'

         group by user_id

     ) oi

     on login.user_id = oi.user_id;

12 查询指定日期的全部商品价格

 题目需求:从商品价格修改明细表(sku_price_modify_detail)中查询截至到2021年10月01号的最新商品价格,假设所有商品初始价格默认都是99。期望结果如下:

sku_id(商品id)price(商品价格)

1               2000.00

2               10.00

3               5000.00

4               6000.00

5               500.00

6               2000.00

7               100.00

分析:

分析

表结构分析 sku_info、 sku_price_modify_detail

sku_info 商品表

sku_price_modify_detail  商品价格变化表

题意:根据表sku_price_modify_detail表2021-10-01最新价格,查询sku_info表中商品价格,若 商品价格没变动为默认值99

1找出2021商品最新价格

DROP view  IF EXISTS v_1;

   Create view v_1 as  select sku_id, new_price, change_date,

                         row_number() over (partition by sku_id order by change_date desc) rn

                  from sku_price_modify_detail

                  where change_date <= '2021-10-01'

注意, row_number() over (partition by sku_id order by change_date desc) rn  根据商品ID号分组,每组按 change_date 逆序排名 ,第一名为最新日期的价格。

(3)根据表 sku_info确定商品新价格

select t1.sku_id, nvl(new_price, 99) price

from sku_info t1

         left join (select sku_id, new_price from v_1 where rn=1)t2

           on t1.sku_id=t2.sku_id

  

注意:商品价格没变动就是将表sku_info 与v_1 左连后 字段值new_price为空,否则为变化值

nvl(new_price, 99) :如果new_price字段值为null,那么返回99,如果不为null,返回new_price本身值

代码:

select sku_info.sku_id,

       nvl(new_price, 99) price

from sku_info

         left join

     (   select sku_id, new_price

         from ( select sku_id, new_price, change_date,

                         row_number() over (partition by sku_id order by change_date desc) rn

                  from sku_price_modify_detail

                  where change_date <= '2021-10-01'

              ) t1

         where rn = 1

     ) t2

     on sku_info.sku_id = t2.sku_id;

13 即时订单比例

题目需求:订单配送中,如果期望配送日期和下单日期相同,称为即时订单,如果期望配送日期和下单日期不同,称为计划订单。

请从配送信息表(delivery_info)中求出每个用户的首单(用户的第一个订单)中即时订单的比例,保留两位小数,以小数形式显示。期望结果如下:

percentage

0.6

分析:delivery_info结构

 `delivery_id  string comment '配送单id',

    order_id    string comment '订单id',

    user_id    string comment '用户id',

    order_date  string comment '下单日期',

    custom_date  string comment '期望配送日期'

Drop view if exists v_1;

 Create view v_1 as select  delivery_id,  user_id,  order_date, custom_date,

        row_number() over (partition by user_id order by order_date) rn

from delivery_info;

注意:排序是为了获得首单

Select * from v_1;

  1. 找出rn=1的记录,再统计 所有记录数、order_date=custom_date记录数

     select

       round(sum(if(order_date=custom_date,1,0))/count(*),2) percentage

     From v_1   where rn=1;

  Round(,) ---取小数位数

Sum() 对1或0求和 统计日期相等记录数

  if(order_date=custom_date,1,0)  日期相等 为1 ,否则为0

Count(*)所有记录数

代码:

select

    round(sum(if(order_date=custom_date,1,0))/count(*),2) percentage

from

(    select  delivery_id,  user_id,  order_date, custom_date,

        row_number() over (partition by user_id order by order_date) rn

    from delivery_info

)t1

where rn=1;

2.14 向用户推荐朋友收藏的商品

题目需求:现需要请向所有用户推荐其朋友收藏 但是用户自己未收藏的商品,请从好友关系表(friendship_info)和收藏表(favor_info)中查询出应向哪位用户推荐哪些商品。期望结果如下:

部分结果展示

user_id(用户id)sku_id(应向该用户推荐的商品id)

101           2

101           4

101           7

101           9

101           8

101          11

101          1

分析:

friendship_info   用户关系表'    

user1_id(用户)       user2_id(用户朋友)

 favor_info 爱好表

user_id (用户id) sku_id(商品id) create_date (收藏日期) 

解答本题关键是 找出自己的朋友收藏商品号

               找出自己收获商品号

 然后求这两个集中差集

代码:

   --  (A except B : 返回仅出现在A中的记录。注: except 时两个表的字段名、字段顺序要一致)

--用户user1_id 的朋友收藏的商品sku_id

select  t.user1_id as user_id, sku_id

from friendship_info t join favor_info t1    on t.user2_id =t1.user_id           

except --取补集(差集)

-- 2)自己收藏的商品

select user_id, sku_id  from favor_info;

15. 查询所有用户的连续登录两天及以上的日期区间

题目需求  从登录明细表(user_login_detail)中查询出,所有用户的连续登录两天及以上的日期区间,以登录时间(login_ts)为准。期望结果如下:

user_id(用户id)  start_date(开始日期)   end_date(结束日期)

101   2021-09-27   2021-09-30

102   2021-10-01   2021-10-02

106   2021-10-04   2021-10-05

107   2021-10-05   2021-10-06

分析:

分析:

  1. 表结构

   Desc  user_login_detail;

   

  1.  变换日期格式 去重(分组每天只有一个记录)

Drop view if exists v_1;

      Create view v_1 as select user_id,date_format(login_ts,'yyyy-MM-dd') login_date from user_login_detail group by  user_id,date_format(login_ts,'yyyy-MM-dd');

注:user_id,date_format(login_ts,'yyyy-MM-dd') 去重

3根据用户分组,按日期排序 ,为确定连续下单准备

drop view if exists v_2;

create view v_2 as select user_id,login_date, row_number() over (partition by user_id order by login_date) rn  from  v_1;

  1. 得判断下单连续日期flag

drop view if exists v_3;

create view v_3 as select user_id,login_date, date_sub(login_date, rn) flag  from v_2;

注意将日期排名次 ,然后与名次相减 日期连续的,flag都相同

4在连续下单客户中找出最早,最迟下单日期

select user_id,

       min(login_date) start_date,

       max(login_date) end_date

From v_3 group by user_id,flag  having count(*)>=2

注意这里涉及一个算法

  统计flag次数 >=2,说明日期有连续的。

代码:

select user_id,

       min(login_date) start_date,

       max(login_date) end_date

from (

         select user_id,

                login_date,

                date_sub(login_date, rn) flag

         from (

                  select user_id,

                         login_date,

                         row_number() over (partition by user_id order by login_date) rn

                  from (

                           select user_id,

                                  date_format(login_ts, 'yyyy-MM-dd') login_date

                           from user_login_detail  group by user_id, date_format(login_ts, 'yyyy-MM-dd')

                       ) t1

              ) t2

     ) t3

group by user_id, flag

having count(*) >= 2;

16 男性和女性每日购物总金额统计

 题目需求 从订单信息表(order_info)和用户信息表(user_info)中,分别统计每天男性和女性用户的订单总金额,如果当天男性或者女性没有购物,则统计结果为0。期望结果如下:

create_date(日期)total_amount_male(男性用户总金额)total_amount_female(女性用户总金额)

2021-09-27         29000.00                             0.00

2021-09-28          70500.00                             0.00

2021-09-29          43300.00                             0.00

2021-09-30          860.00                               0.00

2021-10-01          0.00                                 171680.00

select create_date,

       sum(if(gender = '男', total_amount, 0)) total_amount_male,

       sum(if(gender = '女', total_amount, 0)) total_amount_female

from order_info oi

         left join

     user_info ui

     on oi.user_id = ui.user_id

group by create_date;

分析

表结构

Desc order_info;

Desc user_info

需要两个表:order_info表和user_info表

左连接--->根据下单日期分组--->通过if函数判断是男是女--->sum来统计总数

17 订单金额趋势分析

题目需求:查询截止每天的最近3天内的订单金额总和以及订单金额日平均值,保留两位小数,四舍五入。期望结果如下:

create_date(日期) total_3d(最近3日订单金额总和) avg_ad(最近3日订单金额日平均值)

2021-09-27  29000.00   29000.00  

2021-09-28   99500.00   49750.00

2021-09-29   142800.00  47600.00

2021-09-30   114660.00   38220.00

2021-10-01    215840.00   71946.67

2021-10-02   248690.00   82896.67

2021-10-03  343620.00    114540.00

2021-10-04   301430.00   100476.67

分析:

  1. 统计每日订单销售额

   Drop view if exists v_1;

    Create view v_1 as  select create_date, sum(total_amount) total_amount_by_day

                      from order_info   group by create_date;

  

(2)统计近三天销售量及平均销售量

select create_date,

       sum(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ) total_3d,

       round(avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ), 2) avg_3d

From  v_1

sum(total_amount_by_day) over (order by create_date  rows between 2 preceding and current row )

每行和是三行之和

avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row )

求和及平均

注意: order by create_date   按日期排序,

       rows between 2 preceding and current row  范围:当前行及前二行

select create_date,

       round(sum(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ),2) total_3d,

       round(avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ), 2) avg_3d

from (  select create_date, sum(total_amount) total_amount_by_day

         from order_info   group by create_date

     ) t1;

18 购买过商品1和商品2但是没有购买商品3的顾客

题目需求 从订单明细表(order_detail)中查询出所有购买过商品1和商品2,但是没有购买过商品3的用户,

 分析

结构

order_detail表

     

order_info表

1根据用户分组,每个用户买过商品合并成一个集合skus

    Drop view if exists v_1;

create  view v_1 as  select user_id, collect_set(sku_id) skus

         from order_detail od  left join  order_info oi on od.order_id = oi.order_id

         group by user_id

  1. 找出含商品1,2,3客户

select user_id from v_1  where array_contains(skus ‘1’)

                          And array_contains(skus ‘2’)

                         And !array_contains(skus ‘3’)

   本题主要是两个函数应用:

      collect_set()将一个组的元素合并成一个集合

      Array_contains(,)判断集合是否包含某种元素

select user_id

from (  select user_id, collect_set(sku_id) skus

         from order_detail od

                  left join

              order_info oi

              on od.order_id = oi.order_id

         group by user_id

     ) t1

where array_contains(skus, '1')

  and array_contains(skus, '2')

  and !array_contains(skus, '3');

分析

19 统计每日商品1和商品2销量的差值

题目需求:从订单明细表(order_detail)中统计每天商品1和商品2销量(件数)的差值(商品1销量-商品2销量),期望结果如下:

create_date    diff

2021-09-27     2

2021-10-01    -10

分析

  1. 求每天每个商品销售和

Drop view if exists v_1;

 Create view v_1 as Select sku_id,create_date, sum(sku_num)s from order_detail where sku_id='1' or sku_id='2'  group by sku_id,create_date;

  

select create_date,

       sum(if(sku_id = '1', sku_num, 0)) - sum(if(sku_id = '2', sku_num, 0)) diff

from order_detail  group by create_date;

注意首先要找出符合条件suk_id=’1’ 和suk_id=’2’ order_detail表

如果没有条件就会出现0记录

20 查询出每个用户的最近三笔订单

题目需求:从订单信息表(order_info)中查询出每个用户的最近三笔订单,期望结果如下:

user_id  order_id    create_date

101      2         2021-09-28

101      3         2021-09-29

101      4         2021-09-30

102      5        2021-10-01

select user_id,

       order_id,

       create_date

from (   select user_id , order_id  , create_date

         , row_number() over (partition by user_id order by create_date desc) rk

         from order_info

     ) t1

where rk <= 3;

21查询每个用户登录日期的最大空档期

题目需求  从登录明细表(user_login_detail)中查询每个用户两个登录日期(以login_ts为准)之间的最大的空档期。统计最大空档期时,用户最后一次登录至今的空档也要考虑在内,假设今天为2021-10-10。期望结果如下:

user_id(用户id) max_diff(最大空档期)

101               10

102               9

103               10

104               9

105               6

106               5

107              10

 分析

(1)日期格式化,并去重(每个客户,每天登录时间只有一个)

 Drop view if exists v_1;

 Create view v_1 as select  user_id,

                date_format(login_ts,'yyyy-MM-dd') login_date

                from user_login_detail

                group by user_id,date_format(login_ts,'yyyy-MM-dd');

(2) 取得下一行日期

 Drop view if exists v_2;

  Create view v_2 as  select

            user_id,

            login_date,

            lead(login_date,1,'2021-10-10') over(partition by user_id order by login_date) next_login_date

        From  v_1;

注意窗口函数 lead(login_date,1,'2021-10-10')   取得当前行下一行日期,若下一行不存在,则为'2021-10-10'

(3)取得日期差值

Drop view if exists v_3;

  Create view v_3 as

select user_id, datediff(next_login_date,login_date) diff   From v_3

  注意函数datediff()取得两个日期差值

  (4)取差值每个用户最大值的记录

select  user_id, max(diff) max_diff   from v_3  group by user_id

代码:

select

    user_id,

    max(diff) max_diff

from

(

    select

        user_id,

        datediff(next_login_date,login_date) diff

    from

    (

        select

            user_id,

            login_date,

            lead(login_date,1,'2021-10-10') over(partition by user_id order by login_date) next_login_date

        from

        (

            select  user_id,

                date_format(login_ts,'yyyy-MM-dd') login_date

                from user_login_detail

               group by user_id,date_format(login_ts,'yyyy-MM-dd')

        )t1

    )t2

)t3

group by user_id;

22 查询相同时刻多地登陆的用户

题目需求:从登录明细表(user_login_detail)中查询在相同时刻,多地登陆(ip_address不同)的用户,期望结果如下:

user_id(用户id)

102

104

107

分析:

本题关键是要找出上次退出时间和IP

  1. 找出上次退出时间和ip

   Drop view if exists v_1;

Create view v_1 as select user_id, ip_address,login_ts,logout_ts,

                        -- 1) 计算上次登出时间(如果为null,则使用本次登录时间)

               lag(logout_ts, 1, login_ts) over (partition by user_id order by login_ts) last_logout_ts,

                           -- 2) 计算上次登录的ip地址(如果为null,则默认为 '000.000.000.000')

              lag(ip_address,1,'000.000.000.000') over (partition by user_id order by login_ts) last_ip_address

             from user_login_detail

注意:

 lag(logout_ts, 1, login_ts)  取当前记录前一条 如果存在,值为logout_ts 不存在取本条记录 login_ts值

  1. 查询结果

 select distinct user_id

from v_1   where login_ts < last_logout_ts  and ip_address != last_ip_address;

代码:

select distinct user_id

from (  select user_id, ip_address,login_ts,logout_ts,

        -- 1) 计算上次登出时间(如果为null,则使用本次登录时间)

       lag(logout_ts, 1, login_ts) over (partition by user_id order by login_ts) last_logout_ts,

       -- 2) 计算上次登录的ip地址(如果为null,则默认为 '000.000.000.000')

        lag(ip_address, 1, '000.000.000.000') over (partition by user_id order by login_ts) last_ip_address

         from user_login_detail

     ) t1

where login_ts < last_logout_ts  and ip_address != last_ip_address;

23 销售额完成任务指标的商品

题目需求:商家要求每个商品每个月需要售卖出一定的销售总额

假设1号商品销售总额大于21000,2号商品销售总额大于10000,其余商品没有要求

请写出SQL从订单详情表中(order_detail)查询连续两个月销售总额大于等于任务总额的商品

结果如下:

sku_id(商品id)

1

-- 求出1号商品 和 2号商品 每个月的购买总额 并过滤掉没有满足指标的商品

分析

  1. 求sku_id=’1’ sku_id=’2’ 每天销售额

   Drop view if exists  v_1;

Create view v_1 as select sku_id,create_date,sku_num*price day_sum  from order_detail where sku_id='1' or sku_id='2';

  

(1) 根据年月,按sku_i求和

Drop view if exists  v_2;

Create view v_2 as select date_format(create_date,'yyyy-MM')d,sku_id,sum(day_sum)mm_sum from v_1  group by date_format(create_date,'yyyy-MM'),sku_id;

(2) 计算上个月销售额

drop view if exists v_3;

   Create view v_3 as select sku_id, d,s,lag(mm_sum,1,0) over(partition by sku_id  order by d ) last_s1,  from v_2;

Select  *  from v_3;

    

  1. 从以上表知道,只要判断 sku_id=1 and s>,s1,s2

   Select  sku_id  from v_3  where  (sku_id='1' and mm_sum>21000 and last_s1>21000) or  (sku_id='2' and mm_sum>10000 and last_s1>10000);

  

24 根据商品销售情况进行商品分类

题目需求:从订单详情表中(order_detail)对 对商品进行分类,0-5000为冷门商品,5001-19999为一般商品,20000往上为热门商品,并求出不同类别商品的数量

结果如下:

Category(类型)   Cn(数量)

一般商品          1

冷门商品          10

热门商品          1

本题难点在于题目表述不清,容易造成理解不同

  (1)0-5000是销售额还是销售数量 ,以下统计数量

  (2)类别商品的数量 

是根据sku_id(商品)统计数量,还是根据sku_info表中字段category_id(商品类别)统计数量

以下统计商品数量而不是类别数量

分析

   表order_detail

  1. 统计商品销售件数

   Drop view  if exists v_1;

   Create view v_1 as select sku_id,sum(sku_num) s from order_detail group by sku_id;

Select  category,count(*) cn from

       (Select sku_id,

 case    when  s >=0 and s<=5000 then '冷门商品'

               when  s >=5001 and s<=19999 then '一般商品'

               when  s >=20000 then '热门商品'

       end  category

     from  v_1 )t1   group  by  category ;

   

参考代码

select

  t2.category,

  count(*) cn

from

  (

    select

      t1.sku_id,

      case

      when  t1.sku_sum >=0 and t1.sku_sum<=5000 then '冷门商品'

      when  t1.sku_sum >=5001 and t1.sku_sum<=19999 then '一般商品'

      when  t1.sku_sum >=20000 then '热门商品'

      end  category

    from

      (

        select

          sku_id,

          sum(sku_num)  sku_sum

        from

          order_detail

        group by

          sku_id

    )t1

)t2

group by

  t2.category

25.各品类销量前三的所有商品

题目需求:从订单详情表中(order_detail)和商品(sku_info)中查询各个品类销售数量前三商品。如果该品类小于三个商品,则输出所有的商品销量。

题义析:每个商品和,类别排名前三,且输出销量

Sku_id(商品id)Category_id(品类id)

2                    1

4                    1

1                    1

分析

表order_detail

表sku_info

(1)   求商品销量和 

    Drop view  if exists v_1;

    Create view v_1  as  select  sku_id,  sum(sku_num) sku_sum

                       from  order_detail   group by   sku_id;

(2)v_1 与表 sku_info左连,获得字段category_id

Drop view  if exists v_2;

Create view v_2  as  select t1.sku_id,t1.sku_sum,si.category_id  

from v_1 t1  join  sku_info si   on t1.sku_id=si.sku_id;

(3)根据销售额 sku_sum排序

   Drop view  if exists v_3;

Create view v_3  as  select sku_id,category_id,sku_sum,rank() over(partition by category_id  order by sku_sum desc)rk from v_2;

(4) 查询结果 

select stu_id,category_id,sku_sum  from v_3 where rk<=3;

代码:

select

  t2.sku_id,

  t2.category_id

from

  (

    select

      t1.sku_id,

      si.category_id,

    --商品类别排名

      rank()over(partition by category_id order by t1.sku_sum desc) rk

    from

      (  select

          sku_id,

          sum(sku_num) sku_sum

        from  order_detail

        group by   sku_id

    )t1

    join

      sku_info si

    on

      t1.sku_id=si.sku_id

    )t2

where   t2.rk<=3;

26.各品类中商品价格的中位数

题目需求:从商品信息表sku_info中求各分类商品价格的中位数。如果一个分类下商品个数是偶数则输出中间两个值的平均值,如果是奇数,则输出中间数即可。

分析

(1)按category_id分组,计算分组记录数cn,各记录在排名。

  Drop view if exists v_1;

Create view v_1 as  select sku_id, category_id, price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)   cn,

                count(*) over (partition by category_id) % 2  flag

         from sku_info;

(2)计算分组记录数为偶数价格中数偶数中值

select distinct category_id, avg(price) over (partition by category_id) medprice

From v_1  where flag = 0  and (rn = cn / 2 or rn = cn / 2 + 1);

注意: where flag = 0  and (rn = cn / 2 or rn = cn / 2 + 1) 按条件取出两条记录

(3)计算分组记录数为奇数的价格中数

   select  category_id, price  medprice

From v_1  where flag = 1  and  rn = round(cn / 2) ;  四舍五入,rn一定是中间数

  1. 两种计算进行union

完整代码:

select distinct category_id, avg(price) over (partition by category_id) medprice

from (        select sku_id, category_id, price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)        cn,

                count(*) over (partition by category_id) % 2   flag

         from sku_info

     ) t1

where flag = 0 and (rn = cn / 2 or rn = cn / 2 + 1)

union

select category_id, price

from (         select sku_id, category_id,  price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)         cn,

                count(*) over (partition by category_id) % 2         flag

         from sku_info

     ) t1

where flag = 1  and rn = round(cn / 2);

27. 找出销售额连续3天超过100的商品

题目需求:从订单详情表(order_detail)中找出销售额连续3天超过100的商品

结果如下:

Sku_id(商品id)

1

10

分析

(1)统计商品日销售额>100

Drop view if exists v_1;

     Create view v_1 as select sku_id,create_date, sum(sku_num*price) day_sum  from order_detail  group by sku_id,create_date  having(sum(sku_num*price))>100;

(2)确定连续天数

   Drop view if exists v_2;

     Create view v_2 as  select sku_id,create_date, day_sum,

                Date_sub(create_date, Rank() over(partition by sku_id order by create_date)) rk

                From v_1;

(3)查询

  Select sku_id,  from v_2  group by sku_id, rk having count(*)>=3;

28 查询有新注册用户的当天的新用户数量、新用户的第一天留存率

 题目需求:从用户登录明细表(user_login_detail)中首次登录算作当天新增,第二天也登录了算作一日留存

结果如下:

 分析:

  1. 每天登录客户 去重

  Drop view if exists v_1;

  Create view v_1 as select user_id,date_format(login_ts,'yyyy-MM-dd') login_date

from user_login_detail  group by user_id,date_format(login_ts,'yyyy-MM-dd')

(2) 取下一条记录的登录日期lead_date,并排序 rk

 Drop view if exists v_2;

  Create view v_2 as      

           SELECT user_id,login_date,

            lead(login_date)over(partition by user_id order by login_date) lead_date,

            rank()over(partition by user_id order by login_date) rk

From  v_1

 lead(login_date) over(partition by user_id order by login_date)或

 lead(login_date,1) over(partition by user_id order by login_date)

  取下一条记录字段login_date

  1. 统计首次注册人数 registe

SELECT login_date ,

       count(*) register,

       cast(sum(if(datediff(lead_date,login_date)=1,1,0))/count(*) as decimal(16,2)) retention

From v_2  where rk=1  group by login_date

注意:where rk=1  首次注册

if(datediff(lead_date,login_date)=1,1,0)   lead_date,login_date日期相减=1,说明前后天连续登录用户

 If()=1 否则=0

  Sum () 求连续登录用户之和           

代码:

SELECT login_date

       count(*) register,

       cast(sum(if(datediff(lead_date,login_date)=1,1,0))/count(*) as decimal(16,2)) retention

from(  SELECT user_id,login_date,

       lead(login_date)over(partition by user_id order by login_date) lead_date,

       rank()over(partition by user_id order by login_date) rk

from( select user_id,date_format(login_ts,'yyyy-MM-dd') login_date

from user_login_detail  group by user_id,date_format(login_ts,'yyyy-MM-dd')

  )t1

  )t2

where rk=1   group by login_date

29 求出商品连续售卖的时间区间

题目需求:从订单详情表(order_detail)中,求出商品连续售卖的时间区间

结果如下(截取部分):

Sku_id(商品id)Start_date(起始时间)End_date(结束时间)

1    2021-09-27            2021-09-27

1     2021-09-30           2021-10-01

1     2021-10-03           2021-10-08

10    2021-10-02           2021-10-03

10    2021-10-05           2021-10-08

11    2021-10-02          2021-10-08

12    2021-09-30          2021-09-30

12    2021-10-02          2021-10-06

分析

  1. 找出dd2

按user_id分组,组内按create_date排序kr

 dd2=create-rk   组内如果dd2相等,则是日期是连续的。

   Drop view if exists v_1;

    Create view v_1  as select

sku_id, create_date,dense_rank()over(partition by sku_id order by create_date) rk,

(date_sub(create_date , dense_rank()over(partition by sku_id order by create_date))) as dd2

from order_detail

 

  1. 根据dd2由于此分组日期连接,求每组最大时间最小时间

    Select sku_id,

min(create_date)  as start_date,

max(create_date)  as end_date

from  v_1 group by sku_id,dd2

代码

select

A.sku_id,

min(A.create_date)  as start_date,

max(A.create_date)  as end_date

from

(select

sku_id, create_date,

(date_sub(create_date , dense_rank()over(partition by sku_id order by create_date))) as dd2

from order_detail )A

group by A.sku_id,A.dd2

30. 登录次数及交易次数统计

题目需求:分别从登录明细表(user_login_detail)和配送信息表(delivery_info)中,根据用户登录时间和下单时间统计每个用户在每一天的登录次数和交易次数。

 期望结果如下:

需要用到的表:

用户登录明细表:user_login_detail

配送信息表:delivery_info

分析

1统计每个 user_id 在 每一天的登录次数

  Drop view if exists v_1;

Create view v_1 as  select user_id, date_format(login_ts, 'yyyy-MM-dd') as login_date, count(*)login_count

         from user_login_detail

         group by user_id, date_format(login_ts, 'yyyy-MM-dd');

2) 统计每个 user_id 在每一天的交易(下单)次数

   Drop view if exists v_2;

Create view v_2 as   select user_id,  order_date,  count(*) as order_count

                      from delivery_info   group by user_id, order_date;

(3)两个表连接得到结果

  select t1.user_id   as user_id,t1.login_date login_date, t1.login_count  login_count,

       nvl(t2.order_count, 0) as order_count

From v_1 t1

Join v_2 t2   on t1.user_id = t2.user_id and t1.login_date = t2.order_date;

代码:

select t1.user_id             as user_id,

       t1.login_date          as login_date,

       t1.login_count         as login_count,

       nvl(t2.order_count, 0) as order_count

from (

         -- 1) 统计每个 user_id 在 每一天的 登录次数

         select user_id,

                date_format(login_ts, 'yyyy-MM-dd') as login_date,

                count(*)      as login_count

         from user_login_detail

         group by user_id, date_format(login_ts, 'yyyy-MM-dd')

     ) t1

         left join

     (

         -- 2) 统计每个 user_id 在 每一天的 交易(下单)次数

         select user_id,

                order_date,

                count(*) as order_count

         from delivery_info

         group by user_id, order_date

     ) t2

     on t1.user_id = t2.user_id and t1.login_date = t2.order_date;

31.按年度列出每个商品销售总额

题目需求:从订单明细表(order_detail)中列出每个商品每个年度的购买总额

分析

订单明细表:order_detail

从订单明细表(order_detail)中列出每个商品每个年度的购买总额

结果如下(截取部分):

select  sku_id,  year(create_date) year_date,  sum(price*sku_num) sku_sum

from  order_detail   group by   sku_id,year(create_date)

32. 某周内每件商品每天销售情况

 题目需求:从订单详情表(order_detail)中查询2021年9月27号-2021年10月3号这一周所有商品每天销售情况。

需要用到的表:

订单明细表:order_detail

结果如下:

select  sku_id,

  sum(if(dayofweek(create_date)=2,sku_num,0)) Monday,

  sum(if(dayofweek(create_date)=3,sku_num,0)) Tuesday,

  sum(if(dayofweek(create_date)=4,sku_num,0)) Wednesday,

  sum(if(dayofweek(create_date)=5,sku_num,0)) Thursday,

  sum(if(dayofweek(create_date)=6,sku_num,0)) Friday,

  sum(if(dayofweek(create_date)=7,sku_num,0)) Saturday,

  sum(if(dayofweek(create_date)=1,sku_num,0)) Sunday

from  order_detail   where

  create_date>='2021-09-27' and create_date<='2021-10-03'

group by   sku_id

注意: dayof week(create_date) 取日期周次 (1-7)

1  周日

2   周1

3   周2

4   周3

7   周六

33. 查看每件商品的售价涨幅情况

题目需求:从商品价格变更明细表(sku_price_modify_detail),得到最近一次价格的涨幅情况,并按照涨幅升序排序。

商品价格变更明细表:sku_price_modify_detail

 对每个商品按照修改日期倒序排序 并求出差值

Drop view if exists v_1;

Create view v_1 as select

  sku_id,  new_price-lead(new_price,1,0)  over(partition by sku_id order by change_date desc) price_change,

  rank()  over(partition by sku_id order by change_date desc) rk

From   sku_price_modify_detail

-- 最近一次修改的价格

Select  sku_id, price_change

From v_1   where   rk=1   order by price_change  

34. 销售订单首购和次购分析

 题目需求:通过商品信息表(sku_info)订单信息表(order_info)订单明细表(order_detail)分析如果有一个用户成功下单两个及两个以上的购买成功的手机订单(购买商品为xiaomi 10,apple 12,小米13)那么输出这个用户的id及第一次成功购买手机的日期和第二次成功购买手机的日期,以及购买手机成功的次数。

结果如下:

分析:用户购买二种或三种以上品牌产品 ,输出第一次和第二次购买时间,网上看到答案是输出最一次和最后一次时间。我以为有两个问题

  1. 用户购买两次或以上产品,网上没作判断,对于本题数不影响结果,因为所有用户都购买了两个或以上产品。
  2. 题目要求显示第一次和第二次购买时间,而网上给出的是第一次和最后一次时间。

需要用到的表

订单信息表:order_info

订单明细表:order_detail

商品信息表:sku_info

  1. 找出用户下单指定三种品牌记录

Drop view if exists v_1;

Create view v_1 as select user_id,od.create_date,name from order_detail od

Left join  order_info oi   on od.order_id=oi.order_id

Left join sku_info si   on  od.sku_id=si.sku_id

Where name in('xiaomi 13','xiaomi 10','apple 12');

  1. 找出两个或全以上商品用户

Drop view if exists v_2;

Create view v_2 as select user_id,create_date,name ,cnt from (select * ,count(*) over(partition by user_id)cnt  from v_1)t where cnt>=2;

  1. 按用户分组按create_date排序

  Drop view if exists v_3;

Create view v_3 as SELECT  *,row_number() over(partition by user_id order by create_date )rk from v_2 ;

 

 Select user_id, min(if( rk=1,create_date,null))first_date, min(if( rk=2,create_date,null))secend_date cnt from v_3 where rk<=2 group by user_id;

  附网上代码:

  WITH tmp as(  SELECT user_id,    od.create_date,name

      from order_detail od

      LEFT JOIN sku_info si      on od.sku_id = si.sku_id

      LEFT JOIN order_info oi      on od.order_id = oi.order_id

     where name in ('xiaomi 13','xiaomi 10','apple 12')

)

SELECT   user_id,first_date,last_date,   count(1) cn

from(   SELECT     user_id,

          max(create_date) over(PARTITION by user_id) last_date,

          min(create_date) over(PARTITION by user_id) first_date

      from tmp

)t1 group by user_id,first_date,last_date

35.同期商品售卖分析表

题目需求 从订单明细表(order_detail)中。求出同一个商品在2020年和2021年中同一个月的售卖情况对比。

题意分析:将每个商品2021、2022每一月销售量情况反映如下

         

  1. 求商品月销售量

Drop view if exists v_1;

Create view v_1  as  SELECT sku_id, MONTH(create_date) m, YEAR(create_date) y, sku_num from order_detail )

Select sku_id,y,m,sku_num from v_1  order by  sku_id,y,m;

sku_id   y     m   sku_num

1     2020 10 2

1 2021 9 9

1 2021 9 2

1 2021 10 8

1 2021 10 4

1 2021 10 8

1 2021 10 3

1 2021 10 8

1 2021 10 5

1 2021 10 2

10 2020 10 37

10 2020 10 57

10 2021 10 37

10 2021 10 48

10 2021 10 47

10 2021 10 22

10 2021 10 24

10 2021 10 27

SELECT sku_id,  m month,  sum(IF(y=2020,sku_num,0)) 2020_skusum,

                          sum(IF(y=2021,sku_num,0)) 2021_skusum

from v_1    group by sku_id,m

sku_idmonth 2020_skusum  2021_skusum

36 国庆期间每个品类的商品的收藏量和购买量

题目需求:从订单明细表(order_detail)和收藏信息表(favor_info)统计2021国庆期间,每个商品总收藏量和购买量

分析:

1)国庆期间每个品类的商品的收藏量

    Drop view if exists v_1;     

Create view v_1 as select sku_id, count(*) as favor_cn_tmp  from favor_info where '2021-10-01' <= create_date and create_date <= '2021-10-07'  group by sku_id  ;

  1.   国庆期间每个品类的商品的购买量

       Drop view if exists v_2;     

Create view v_2  as   select sku_id,  sum(sku_num) as sku_sum_tmp  from order_detail

       where '2021-10-01' <= create_date  and create_date <= '2021-10-07'   group by sku_id;

37 统计活跃间隔对用户分级结果

题目需求  用户等级:忠实用户:近7天活跃且非新用户

新晋用户:近7天新增

沉睡用户:近7天未活跃但是在7天前活跃

流失用户:近30天未活跃但是在30天前活跃

假设今天是数据中所有日期的最大值,从用户登录明细表中的用户登录时间给各用户分级,求出各等级用户的人数

分析

(1)

  Drop view  if  exists  v_1;

  Create view v_1 as SELECT  user_id,

        max(date_format(logout_ts,"yyyy-MM-dd")) over() today, --全局窗口

        --按用户id分组的窗口,求出最后一次登录日期

        max(date_format(login_ts,"yyyy-MM-dd")) over(PARTITION by user_id) last_login,

        --按用户id分组的窗口,求出第一次登录日期

        min(date_format(login_ts,"yyyy-MM-dd")) over(PARTITION by user_id) first_login

    from user_login_detail

(2)

 Drop view  if  exists  v_2;

  Create view v_2 as  SELECT

        user_id,

        case when first_login < date_sub(today,7) AND last_login >= date_sub(today,7) then '忠实用户'

                 when first_login >= date_sub(today,7) then '新增用户'

                 when last_login < date_sub(today,7) then '沉睡用户'

                 when last_login < date_sub(today,30) then '流失用户'

                 END   level

from  v_1;

查询汉字出现???,如何解决?

38.连续签到领金币数

题目需求 用户每天签到可以领1金币,并可以累计签到天数,连续签到的第3、7天分别可以额外领2和6金币。

每连续签到7天重新累积签到天数。

从用户登录明细表中求出每个用户金币总数,并按照金币总数倒序排序

表user_login_detail

 (1)字段login_ts格式转换“yyyy-MM-dd”,且去除不需要的字段

  Drop view if exists v_1;

 Create view v_1 as select user_id, date_format(login_ts, "yyyy-MM-dd") ts

from user_login_detail

  1. 统计用户的连续登录区间

 Drop view if exists v_2;

 Create view v_2 as select user_id,

       ts,

       row_number() over (partition by user_id order by ts)  rn,

       date_sub(ts, row_number() over (partition by user_id order by ts)) flag

From v_1

  1. 按照用户的id和flag进行分组,每个用户的不同登录区间找出来

ts表示登录的日期,rn是用了开窗排序,按照登录日期来排序,如果flag字段相等的话,就说明在这段时间是连续登录的。

表中101用户,登录区间是断开的,再次用row_number开个窗,按照用户的id和flag进行分组我们要怎么把1,2,3,4,5拆成1,2,3,4和1。

 Drop view if exists v_3;

 Create view v_3 as  select user_id,ts,

row_number() over (partition by user_id, flag order by ts) cnt

from  v_2;

   

  1. 对第三天登录和第七天登录的金币作标记3和7,其余的天数登录都是1。

 Drop view if exists v_4;

 Create view v_4 as select user_id,

       case

           when (cnt - 3) = 0  or (cnt - 3) % 7 = 0 then 3

           when (cnt % 7) = 0 then 7

           else 1 end glods

From v_3;

   Select * from v_4;

     

  1. 简单的分id做sum聚合

     select user_id, sum(glods) num  from v_4 group by user_id;

         

39.国庆期间的7日动销率和滞销率

题目需求:动销率定义为品类商品中一段时间内有销量的商品占当前已上架总商品数比例(有销量的商品/已上架总商品数)。

滞销率定义为品类商品中一段时间内没有销量的商品占当前已上架总商品数的比例。(没有销量的商品/ 已上架总商品数)。

只要当天任一店铺有任何商品的销量就输出该天的结果

从订单明细表(order_detail)确定定单时间(create_date)

商品信息表(sku_info)确定上架品类(category_id)品名name

  1. 生成字段为category_id,create_date,name国庆记录

Drop view if exists v_1;

 Create view v_1  as  select  distinct  si.category_id, od.create_date, si.name  from  order_detail od

          join  sku_info si  on  od.sku_id=si.sku_id

          where  od.create_date>='2021-10-01' and od.create_date<='2021-10-07'

  1. 统计每天品类数量

 Drop view if exists v_2;

 Create view v_2  as select

     t1.category_id,

     sum(if(t1.create_date='2021-10-01',1,0))  day_1,

     sum(if(t1.create_date='2021-10-02',1,0))  day-2,

     sum(if(t1.create_date='2021-10-03',1,0))  day-3,

     sum(if(t1.create_date='2021-10-04',1,0))  day-4,

     sum(if(t1.create_date='2021-10-05',1,0))  day-5,

     sum(if(t1.create_date='2021-10-06',1,0))  day-6,

     sum(if(t1.create_date='2021-10-07',1,0))  day-7 

  From v_1 t1   group by  t1.category_id;

(3)

 -- 每一天的动销率 和 滞销率

select

  t2.category_id,

  t2.day_1/t3.cn,  1-t2.day_1/t3.cn,

  t2.day_2/t3.cn,  1-t2.day_2/t3.cn,

  t2.day_3/t3.cn,  1-t2.day_3/t3.cn,

  t2.day_4/t3.cn,  1-t2.day_4/t3.cn,

  t2.day_5/t3.cn,  1-t2.day_5/t3.cn,

  t2.day_6/t3.cn,  1-t2.day_6/t3.cn,

  t2.day_7/t3.cn,  1-t2.day_7/t3.cn

From  v_2  t2

join  ( select  category_id, count(*) cn  from  sku_info  group by  category_id )t3

on   t2.category_id=t3.category_id

注意:  select  category_id, count(*) cn  from  sku_info  group by  category_id 统计上架整个品类数量

代码:

-- 每一天的动销率 和 滞销率

select

  t2.category_id,

  t2.day_1/t3.cn,  1-t2.day_1/t3.cn,

  t2.day_2/t3.cn,  1-t2.day_2/t3.cn,

  t2.day_3/t3.cn,  1-t2.day_3/t3.cn,

  t2.day_4/t3.cn,  1-t2.day_4/t3.cn,

  t2.day_5/t3.cn,  1-t2.day_5/t3.cn,

  t2.day_6/t3.cn,  1-t2.day_6/t3.cn,

  t2.day_7/t3.cn,  1-t2.day_7/t3.cn

from

  (  select

     t1.category_id,

     sum(if(t1.create_date='2021-10-01',1,0)) day_1,

     sum(if(t1.create_date='2021-10-02',1,0)) day_2,

     sum(if(t1.create_date='2021-10-03',1,0)) day_3,

     sum(if(t1.create_date='2021-10-04',1,0)) day_4,

     sum(if(t1.create_date='2021-10-05',1,0)) day_5,

     sum(if(t1.create_date='2021-10-06',1,0)) day_6,

     sum(if(t1.create_date='2021-10-07',1,0)) day_7

   from ( select  distinct  si.category_id, od.create_date, si.name  from  order_detail od

          join  sku_info si  on  od.sku_id=si.sku_id

          where  od.create_date>='2021-10-01' and od.create_date<='2021-10-07' )t1

       group by  t1.category_id

   )t2

join  ( select  category_id, count(*) cn  from  sku_info  group by  category_id )t3

on  t2.category_id=t3.category_id

40 同时在线最多的人数

 题目需求:根据用户登录明细表(user_login_detail),求出平台同时在线最多的人数。

分析:

(1)确定用户在线还是离线,tag=1,在线 tag =-1 离线

Drop view if exists v_1;

Create view v_1 as SELECT

      login_ts ts, 1 tag   FROM  user_login_detail

    UNION ALL

SELECT    logout_ts ts,   -1 tag    FROM    user_login_detail

注意: (1)两个数据集合并 需要相同字段

       (2) union 合并并去重,union all 合并不去重

       

  1. 计算tag和。

表中一个用户要么 tag=1 (上线)

            要么 tag=1,tag=-1(先上线,后下线)

所以统计 tag和,就可以确定线上人数。  

Drop view if exists v_2;

Create view v_2 as select sum(tag) over(order by ts) cn

FROM v_1;

注意:sum(tag) over(order by ts) cn

      求和范围是从开始到日前时刻ts时tag和。

注意:求和范围

 ts          tag  cn

2021-09-21 08:00:00 1      1

2021-09-22 09:00:00 1      2

2021-09-23 10:00:00 1      3

2021-09-24 11:00:00 1      4

2021-09-25 12:00:00 1      5

2021-09-26 13:00:00 1      6

2021-09-27 08:00:00 1      7

2021-09-27 08:30:00 -1     5

2021-09-27 08:30:00 -1     5

2021-09-27 09:30:00 -1     4

2021-09-27 10:30:00 -1     3

2021-09-27 11:30:00 -1     2

2021-09-27 12:30:00 -1     1

2021-09-27 13:30:00 -1     0

2021-09-27 14:00:00 1      1

2021-09-27 14:30:00 -1     0

2021-09-28 09:00:00 1     

2021-09-28 09:10:00 -1

2021-09-29 13:30:00 1

2021-09-29 13:50:00 -1

2021-09-30 20:00:00 1

2021-09-30 20:10:00 -1

2021-10-01 07:50:00 1

2021-10-01 08:00:00 1

2021-10-01 08:20:00 -1

2021-10-01 08:30:00 -1

2021-10-02 08:00:00 1

2021-10-02 08:30:00 -1

2021-10-03 07:50:00 1

2021-10-03 07:50:00 1

2021-10-03 08:20:00 -1

2021-10-03 08:40:00 1

2021-10-03 08:50:00 1

2021-10-03 09:20:00 -1

2021-10-03 10:20:00 -1

2021-10-03 10:30:00 -1

2021-10-04 08:40:00 1

2021-10-04 09:10:00 1

2021-10-04 09:30:00 -1

2021-10-04 10:30:00 -1

2021-10-05 21:50:00 1

2021-10-05 22:00:00 1

2021-10-05 22:40:00 -1

2021-10-05 23:00:00 -1

2021-10-06 08:50:00 1

2021-10-06 09:00:00 1

2021-10-06 09:00:00 1

2021-10-06 09:10:00 1

2021-10-06 10:00:00 -1

2021-10-06 10:00:00 -1

2021-10-06 10:20:00 -1

2021-10-06 10:20:00 -1

2021-10-06 22:00:00 1

2021-10-06 23:00:00 -1

2021-10-08 09:00:00 1

2021-10-08 09:10:00 -1

2021-10-09 08:50:00 1

2021-10-09 10:20:00 -1

(3) 统计线最大线上人数

Select  max(cn) cn from v_2;

代码:

-- 登录标记1 下线标记-1

select

  login_ts l_time,

  1 flag

from

  user_login_detail

union

select

  logout_ts l_time,

  -1 flag

from

  user_login_detail

-- 按照时间求和

select

  sum(flag)over(order by t1.l_time) sum_l_time

from

  (

    select

      login_ts l_time,

      1 flag

    from

      user_login_detail

    union

    select

      logout_ts l_time,

      -1 flag

    from

      user_login_detail

)t1  

-- 拿到最大值 就是同时在线最多人数

select

  max(sum_l_time)

from

  (

    select

      sum(flag)over(order by t1.l_time) sum_l_time

    from

      (

        select

          login_ts l_time,

          1 flag

        from

          user_login_detail

        union

        select

          logout_ts l_time,

          -1 flag

        from

          user_login_detail

    )t1

)t2

  • 实验数据准备

注意实验时可能会出现Hive 查询视图时遇到中文乱码???问题解决方法

进入mysql,输入以下两条命令:

ALTER TABLE `TBLS` MODIFY COLUMN VIEW_EXPANDED_TEXT mediumtext CHARACTER SET utf8;

ALTER TABLE `TBLS` MODIFY COLUMN VIEW_ORIGINAL_TEXT mediumtext CHARACTER SET utf8;

 创建表并加载数据

--1.用户信息表

DROP TABLE IF EXISTS user_info;

CREATE table user_info(

    user_id string comment'用户id',

    gender string comment'性别',

    birthday string comment'生日'

)comment'用户信息表'

row format delimited fields terminated by '\t';

--数据装载

insert overwrite table user_info

values ('101', '男', '1990-01-01'),

       ('102', '女', '1991-02-01'),

       ('103', '女', '1992-03-01'),

       ('104', '男', '1993-04-01'),

       ('105', '女', '1994-05-01'),

       ('106', '男', '1995-06-01'),

       ('107', '女', '1996-07-01'),

       ('108', '男', '1997-08-01'),

       ('109', '女', '1998-09-01'),

       ('1010', '男', '1999-10-01');

--2. 商品信息表

DROP TABLE IF EXISTS sku_info;

CREATE TABLE sku_info(

    `sku_id`      string COMMENT '商品id',

    `name`        string COMMENT '商品名称',

    `category_id` string COMMENT '所属分类id',

    `from_date`   string COMMENT '上架日期',

    `price`       double COMMENT '商品单价'

) COMMENT '商品属性表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

hive>

insert overwrite table sku_info

values ('1', 'xiaomi 10', '1', '2020-01-01', 2000),

       ('2', '手机壳', '1', '2020-02-01', 10),

       ('3', 'apple 12', '1', '2020-03-01', 5000),

       ('4', 'xiaomi 13', '1', '2020-04-01', 6000),

       ('5', '破壁机', '2', '2020-01-01', 500),

       ('6', '洗碗机', '2', '2020-02-01', 2000),

       ('7', '热水壶', '2', '2020-03-01', 100),

       ('8', '微波炉', '2', '2020-04-01', 600),

       ('9', '自行车', '3', '2020-01-01', 1000),

       ('10', '帐篷', '3', '2020-02-01', 100),

       ('11', '烧烤架', '3', '2020-02-01', 50),

       ('12', '遮阳伞', '3', '2020-03-01', 20);

--3.商品分类信息表

DROP TABLE IF EXISTS category_info;

create table category_info(

    `category_id`   string comment '分类id',

    `category_name`  string comment '分类名称'

) COMMENT  '品类表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table category_info

values ('1','数码'),

       ('2','厨卫'),

       ('3','户外');

--4.订单信息表

DROP TABLE IF EXISTS order_info;

create table order_info(

    `order_id`     string COMMENT '订单id',

    `user_id`      string COMMENT '用户id',

    `create_date`  string COMMENT '下单日期',

    `total_amount` decimal(16, 2) COMMENT '订单总金额'

) COMMENT '订单表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table order_info

values ('1', '101', '2021-09-27', 29000.00),

       ('2', '101', '2021-09-28', 70500.00),

       ('3', '101', '2021-09-29', 43300.00),

       ('4', '101', '2021-09-30', 860.00),

       ('5', '102', '2021-10-01', 46180.00),

       ('6', '102', '2021-10-01', 50000.00),

       ('7', '102', '2021-10-01', 75500.00),

       ('8', '102', '2021-10-02', 6170.00),

       ('9', '103', '2021-10-02', 18580.00),

       ('10', '103', '2021-10-02', 28000.00),

       ('11', '103', '2021-10-02', 23400.00),

       ('12', '103', '2021-10-03', 5910.00),

       ('13', '104', '2021-10-03', 13000.00),

       ('14', '104', '2021-10-03', 69500.00),

       ('15', '104', '2021-10-03', 2000.00),

       ('16', '104', '2021-10-03', 5380.00),

       ('17', '105', '2021-10-04', 6210.00),

       ('18', '105', '2021-10-04', 68000.00),

       ('19', '105', '2021-10-04', 43100.00),

       ('20', '105', '2021-10-04', 2790.00),

       ('21', '106', '2021-10-04', 9390.00),

       ('22', '106', '2021-10-05', 58000.00),

       ('23', '106', '2021-10-05', 46600.00),

       ('24', '106', '2021-10-05', 5160.00),

       ('25', '107', '2021-10-05', 55350.00),

       ('26', '107', '2021-10-05', 14500.00),

       ('27', '107', '2021-10-06', 47400.00),

       ('28', '107', '2021-10-06', 6900.00),

       ('29', '108', '2021-10-06', 56570.00),

       ('30', '108', '2021-10-06', 44500.00),

       ('31', '108', '2021-10-07', 50800.00),

       ('32', '108', '2021-10-07', 3900.00),

       ('33', '109', '2021-10-07', 41480.00),

       ('34', '109', '2021-10-07', 88000.00),

       ('35', '109', '2020-10-08', 15000.00),

       ('36', '109', '2020-10-08', 9020.00),

       ('37', '1010', '2020-10-08', 9260.00),

       ('38', '1010', '2020-10-08', 12000.00),

       ('39', '1010', '2020-10-08', 23900.00),

       ('40', '1010', '2020-10-08', 6790.00);

--5.订单明细表

DROP TABLE IF EXISTS order_detail;

CREATE TABLE order_detail

(

    `order_detail_id` string COMMENT '订单明细id',

    `order_id`        string COMMENT '订单id',

    `sku_id`          string COMMENT '商品id',

    `create_date`     string COMMENT '下单日期',

    `price`           decimal(16, 2) COMMENT '下单时的商品单价',

    `sku_num`         int COMMENT '下单商品件数'

) COMMENT '订单明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

INSERT overwrite table order_detail

values ('1', '1', '1', '2021-09-27', 2000.00, 2),

       ('2', '1', '3', '2021-09-27', 5000.00, 5),

       ('3', '2', '4', '2021-09-28', 6000.00, 9),

       ('4', '2', '5', '2021-09-28', 500.00, 33),

       ('5', '3', '7', '2021-09-29', 100.00, 37),

       ('6', '3', '8', '2021-09-29', 600.00, 46),

       ('7', '3', '9', '2021-09-29', 1000.00, 12),

       ('8', '4', '12', '2021-09-30', 20.00, 43),

       ('9', '5', '1', '2021-10-01', 2000.00, 8),

       ('10', '5', '2', '2021-10-01', 10.00, 18),

       ('11', '5', '3', '2021-10-01', 5000.00, 6),

       ('12', '6', '4', '2021-10-01', 6000.00, 8),

       ('13', '6', '6', '2021-10-01', 2000.00, 1),

       ('14', '7', '7', '2021-10-01', 100.00, 17),

       ('15', '7', '8', '2021-10-01', 600.00, 48),

       ('16', '7', '9', '2021-10-01', 1000.00, 45),

       ('17', '8', '10', '2021-10-02', 100.00, 48),

       ('18', '8', '11', '2021-10-02', 50.00, 15),

       ('19', '8', '12', '2021-10-02', 20.00, 31),

       ('20', '9', '1', '2021-09-30', 2000.00, 9),

       ('21', '9', '2', '2021-10-02', 10.00, 5800),

       ('22', '10', '4', '2021-10-02', 6000.00, 1),

       ('23', '10', '5', '2021-10-02', 500.00, 24),

       ('24', '10', '6', '2021-10-02', 2000.00, 5),

       ('25', '11', '8', '2021-10-02', 600.00, 39),

       ('26', '12', '10', '2021-10-03', 100.00, 47),

       ('27', '12', '11', '2021-10-03', 50.00, 19),

       ('28', '12', '12', '2021-10-03', 20.00, 13000),

       ('29', '13', '1', '2021-10-03', 2000.00, 4),

       ('30', '13', '3', '2021-10-03', 5000.00, 1),

       ('31', '14', '4', '2021-10-03', 6000.00, 5),

       ('32', '14', '5', '2021-10-03', 500.00, 47),

       ('33', '14', '6', '2021-10-03', 2000.00, 8),

       ('34', '15', '7', '2021-10-03', 100.00, 20),

       ('35', '16', '10', '2021-10-03', 100.00, 22),

       ('36', '16', '11', '2021-10-03', 50.00, 42),

       ('37', '16', '12', '2021-10-03', 20.00, 7400),

       ('38', '17', '1', '2021-10-04', 2000.00, 3),

       ('39', '17', '2', '2021-10-04', 10.00, 21),

       ('40', '18', '4', '2021-10-04', 6000.00, 8),

       ('41', '18', '5', '2021-10-04', 500.00, 28),

       ('42', '18', '6', '2021-10-04', 2000.00, 3),

       ('43', '19', '7', '2021-10-04', 100.00, 55),

       ('44', '19', '8', '2021-10-04', 600.00, 11),

       ('45', '19', '9', '2021-10-04', 1000.00, 31),

       ('46', '20', '11', '2021-10-04', 50.00, 45),

       ('47', '20', '12', '2021-10-04', 20.00, 27),

       ('48', '21', '1', '2021-10-04', 2000.00, 2),

       ('49', '21', '2', '2021-10-04', 10.00, 39),

       ('50', '21', '3', '2021-10-04', 5000.00, 1),

       ('51', '22', '4', '2021-10-05', 6000.00, 8),

       ('52', '22', '5', '2021-10-05', 500.00, 20),

       ('53', '23', '7', '2021-10-05', 100.00, 58),

       ('54', '23', '8', '2021-10-05', 600.00, 18),

       ('55', '23', '9', '2021-10-05', 1000.00, 30),

       ('56', '24', '10', '2021-10-05', 100.00, 27),

       ('57', '24', '11', '2021-10-05', 50.00, 28),

       ('58', '24', '12', '2021-10-05', 20.00, 53),

       ('59', '25', '1', '2021-10-05', 2000.00, 5),

       ('60', '25', '2', '2021-10-05', 10.00, 35),

       ('61', '25', '3', '2021-10-05', 5000.00, 9),

       ('62', '26', '4', '2021-10-05', 6000.00, 1),

       ('63', '26', '5', '2021-10-05', 500.00, 13),

       ('64', '26', '6', '2021-10-05', 2000.00, 1),

       ('65', '27', '7', '2021-10-06', 100.00, 30),

       ('66', '27', '8', '2021-10-06', 600.00, 19),

       ('67', '27', '9', '2021-10-06', 1000.00, 33),

       ('68', '28', '10', '2021-10-06', 100.00, 37),

       ('69', '28', '11', '2021-10-06', 50.00, 46),

       ('70', '28', '12', '2021-10-06', 20.00, 45),

       ('71', '29', '1', '2021-10-06', 2000.00, 8),

       ('72', '29', '2', '2021-10-06', 10.00, 57),

       ('73', '29', '3', '2021-10-06', 5000.00, 8),

       ('74', '30', '4', '2021-10-06', 6000.00, 3),

       ('75', '30', '5', '2021-10-06', 500.00, 33),

       ('76', '30', '6', '2021-10-06', 2000.00, 5),

       ('77', '31', '8', '2021-10-07', 600.00, 13),

       ('78', '31', '9', '2021-10-07', 1000.00, 43),

       ('79', '32', '10', '2021-10-07', 100.00, 24),

       ('80', '32', '11', '2021-10-07', 50.00, 30),

       ('81', '33', '1', '2021-10-07', 2000.00, 8),

       ('82', '33', '2', '2021-10-07', 10.00, 48),

       ('83', '33', '3', '2021-10-07', 5000.00, 5),

       ('84', '34', '4', '2021-10-07', 6000.00, 10),

       ('85', '34', '5', '2021-10-07', 500.00, 44),

       ('86', '34', '6', '2021-10-07', 2000.00, 3),

       ('87', '35', '8', '2020-10-08', 600.00, 25),

       ('88', '36', '10', '2020-10-08', 100.00, 57),

       ('89', '36', '11', '2020-10-08', 50.00, 44),

       ('90', '36', '12', '2020-10-08', 20.00, 56),

       ('91', '37', '1', '2020-10-08', 2000.00, 2),

       ('92', '37', '2', '2020-10-08', 10.00, 26),

       ('93', '37', '3', '2020-10-08', 5000.00, 1),

       ('94', '38', '6', '2020-10-08', 2000.00, 6),

       ('95', '39', '7', '2020-10-08', 100.00, 35),

       ('96', '39', '8', '2020-10-08', 600.00, 34),

       ('97', '40', '10', '2020-10-08', 100.00, 37),

       ('98', '40', '11', '2020-10-08', 50.00, 51),

       ('99', '40', '12', '2020-10-08', 20.00, 27);

--6.登录明细表

DROP TABLE IF EXISTS user_login_detail;

CREATE TABLE user_login_detail

(

    `user_id`    string comment '用户id',

    `ip_address` string comment 'ip地址',

    `login_ts`   string comment '登录时间',

    `logout_ts`  string comment '登出时间'

) COMMENT '用户登录明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

INSERT overwrite table user_login_detail

VALUES ('101', '180.149.130.161', '2021-09-21 08:00:00', '2021-09-27 08:30:00'),

       ('101', '180.149.130.161', '2021-09-27 08:00:00', '2021-09-27 08:30:00'),

       ('101', '180.149.130.161', '2021-09-28 09:00:00', '2021-09-28 09:10:00'),

       ('101', '180.149.130.161', '2021-09-29 13:30:00', '2021-09-29 13:50:00'),

       ('101', '180.149.130.161', '2021-09-30 20:00:00', '2021-09-30 20:10:00'),

       ('102', '120.245.11.2', '2021-09-22 09:00:00', '2021-09-27 09:30:00'),

       ('102', '120.245.11.2', '2021-10-01 08:00:00', '2021-10-01 08:30:00'),

       ('102', '180.149.130.174', '2021-10-01 07:50:00', '2021-10-01 08:20:00'),

       ('102', '120.245.11.2', '2021-10-02 08:00:00', '2021-10-02 08:30:00'),

       ('103', '27.184.97.3', '2021-09-23 10:00:00', '2021-09-27 10:30:00'),

       ('103', '27.184.97.3', '2021-10-03 07:50:00', '2021-10-03 09:20:00'),

       ('104', '27.184.97.34', '2021-09-24 11:00:00', '2021-09-27 11:30:00'),

       ('104', '27.184.97.34', '2021-10-03 07:50:00', '2021-10-03 08:20:00'),

       ('104', '27.184.97.34', '2021-10-03 08:50:00', '2021-10-03 10:20:00'),

       ('104', '120.245.11.89', '2021-10-03 08:40:00', '2021-10-03 10:30:00'),

       ('105', '119.180.192.212', '2021-10-04 09:10:00', '2021-10-04 09:30:00'),

       ('106', '119.180.192.66', '2021-10-04 08:40:00', '2021-10-04 10:30:00'),

       ('106', '119.180.192.66', '2021-10-05 21:50:00', '2021-10-05 22:40:00'),

       ('107', '219.134.104.7', '2021-09-25 12:00:00', '2021-09-27 12:30:00'),

       ('107', '219.134.104.7', '2021-10-05 22:00:00', '2021-10-05 23:00:00'),

       ('107', '219.134.104.7', '2021-10-06 09:10:00', '2021-10-06 10:20:00'),

       ('107', '27.184.97.46', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),

       ('108', '101.227.131.22', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),

       ('108', '101.227.131.22', '2021-10-06 22:00:00', '2021-10-06 23:00:00'),

       ('109', '101.227.131.29', '2021-09-26 13:00:00', '2021-09-27 13:30:00'),

       ('109', '101.227.131.29', '2021-10-06 08:50:00', '2021-10-06 10:20:00'),

       ('109', '101.227.131.29', '2021-10-08 09:00:00', '2021-10-08 09:10:00'),

       ('1010', '119.180.192.10', '2021-09-27 14:00:00', '2021-09-27 14:30:00'),

       ('1010', '119.180.192.10', '2021-10-09 08:50:00', '2021-10-09 10:20:00');

--7.商品价格变更明细表

DROP TABLE IF EXISTS sku_price_modify_detail;

CREATE TABLE sku_price_modify_detail

(

    `sku_id`      string comment '商品id',

    `new_price`   decimal(16, 2) comment '更改后的价格',

    `change_date` string comment '变动日期'

) COMMENT '商品价格变更明细表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table sku_price_modify_detail

values ('1', 1900, '2021-09-25'),

       ('1', 2000, '2021-09-26'),

       ('2', 80, '2021-09-29'),

       ('2', 10, '2021-09-30'),

       ('3', 4999, '2021-09-25'),

       ('3', 5000, '2021-09-26'),

       ('4', 5600, '2021-09-26'),

       ('4', 6000, '2021-09-27'),

       ('5', 490, '2021-09-27'),

       ('5', 500, '2021-09-28'),

       ('6', 1988, '2021-09-30'),

       ('6', 2000, '2021-10-01'),

       ('7', 88, '2021-09-28'),

       ('7', 100, '2021-09-29'),

       ('8', 800, '2021-09-28'),

       ('8', 600, '2021-09-29'),

       ('9', 1100, '2021-09-27'),

       ('9', 1000, '2021-09-28'),

       ('10', 90, '2021-10-01'),

       ('10', 100, '2021-10-02'),

       ('11', 66, '2021-10-01'),

       ('11', 50, '2021-10-02'),

       ('12', 35, '2021-09-28'),

       ('12', 20, '2021-09-29');

--8.配送信息表

DROP TABLE IF EXISTS delivery_info;

CREATE TABLE delivery_info

(  `delivery_id` string comment '配送单id',

    `order_id`    string comment '订单id',

    `user_id`     string comment '用户id',

    `order_date`  string comment '下单日期',

    `custom_date` string comment '期望配送日期'

) COMMENT '邮寄信息表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table delivery_info

values ('1', '1', '101', '2021-09-27', '2021-09-29'),

       ('2', '2', '101', '2021-09-28', '2021-09-28'),

       ('3', '3', '101', '2021-09-29', '2021-09-30'),

       ('4', '4', '101', '2021-09-30', '2021-10-01'),

       ('5', '5', '102', '2021-10-01', '2021-10-01'),

       ('6', '6', '102', '2021-10-01', '2021-10-01'),

       ('7', '7', '102', '2021-10-01', '2021-10-03'),

       ('8', '8', '102', '2021-10-02', '2021-10-02'),

       ('9', '9', '103', '2021-10-02', '2021-10-03'),

       ('10', '10', '103', '2021-10-02', '2021-10-04'),

       ('11', '11', '103', '2021-10-02', '2021-10-02'),

       ('12', '12', '103', '2021-10-03', '2021-10-03'),

       ('13', '13', '104', '2021-10-03', '2021-10-04'),

       ('14', '14', '104', '2021-10-03', '2021-10-04'),

       ('15', '15', '104', '2021-10-03', '2021-10-03'),

       ('16', '16', '104', '2021-10-03', '2021-10-03'),

       ('17', '17', '105', '2021-10-04', '2021-10-04'),

       ('18', '18', '105', '2021-10-04', '2021-10-06'),

       ('19', '19', '105', '2021-10-04', '2021-10-06'),

       ('20', '20', '105', '2021-10-04', '2021-10-04'),

       ('21', '21', '106', '2021-10-04', '2021-10-04'),

       ('22', '22', '106', '2021-10-05', '2021-10-05'),

       ('23', '23', '106', '2021-10-05', '2021-10-05'),

       ('24', '24', '106', '2021-10-05', '2021-10-07'),

       ('25', '25', '107', '2021-10-05', '2021-10-05'),

       ('26', '26', '107', '2021-10-05', '2021-10-06'),

       ('27', '27', '107', '2021-10-06', '2021-10-06'),

       ('28', '28', '107', '2021-10-06', '2021-10-07'),

       ('29', '29', '108', '2021-10-06', '2021-10-06'),

       ('30', '30', '108', '2021-10-06', '2021-10-06'),

       ('31', '31', '108', '2021-10-07', '2021-10-09'),

       ('32', '32', '108', '2021-10-07', '2021-10-09'),

       ('33', '33', '109', '2021-10-07', '2021-10-08'),

       ('34', '34', '109', '2021-10-07', '2021-10-08'),

       ('35', '35', '109', '2021-10-08', '2021-10-10'),

       ('36', '36', '109', '2021-10-08', '2021-10-09'),

       ('37', '37', '1010', '2021-10-08', '2021-10-10'),

       ('38', '38', '1010', '2021-10-08', '2021-10-10'),

       ('39', '39', '1010', '2021-10-08', '2021-10-09'),

       ('40', '40', '1010', '2021-10-08', '2021-10-09');

--9.好友关系表

--注:表中一行数据中的两个user_id,表示两个用户互为好友。

DROP TABLE IF EXISTS friendship_info;

CREATE TABLE friendship_info(

    `user1_id` string comment '用户1id',

    `user2_id` string comment '用户2id'

) COMMENT '用户关系表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table friendship_info

values ('101', '1010'),

       ('101', '108'),

       ('101', '106'),

       ('101', '104'),

       ('101', '102'),

       ('102', '1010'),

       ('102', '108'),

       ('102', '106'),

       ('102', '104'),

       ('102', '102'),

       ('103', '1010'),

       ('103', '108'),

       ('103', '106'),

       ('103', '104'),

       ('103', '102'),

       ('104', '1010'),

       ('104', '108'),

       ('104', '106'),

       ('104', '104'),

       ('104', '102'),

       ('105', '1010'),

       ('105', '108'),

       ('105', '106'),

       ('105', '104'),

       ('105', '102'),

       ('106', '1010'),

       ('106', '108'),

       ('106', '106'),

       ('106', '104'),

       ('106', '102'),

       ('107', '1010'),

       ('107', '108'),

       ('107', '106'),

       ('107', '104'),

       ('107', '102'),

       ('108', '1010'),

       ('108', '108'),

       ('108', '106'),

       ('108', '104'),

       ('108', '102'),

       ('109', '1010'),

       ('109', '108'),

       ('109', '106'),

       ('109', '104'),

       ('109', '102'),

       ('1010', '1010'),

       ('1010', '108'),

       ('1010', '106'),

       ('1010', '104'),

       ('1010', '102');

--10.收藏信息表

DROP TABLE IF EXISTS favor_info;

CREATE TABLE favor_info

(   `user_id`     string comment '用户id',

    `sku_id`      string comment '商品id',

    `create_date` string comment '收藏日期'

) COMMENT '用户收藏表'

    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

--数据装载

insert overwrite table favor_info

values ('101', '3', '2021-09-23'),

       ('101', '12', '2021-09-23'),

       ('101', '6', '2021-09-25'),

       ('101', '10', '2021-09-21'),

       ('101', '5', '2021-09-25'),

       ('102', '1', '2021-09-24'),

       ('102', '2', '2021-09-24'),

       ('102', '8', '2021-09-23'),

       ('102', '12', '2021-09-22'),

       ('102', '11', '2021-09-23'),

       ('102', '9', '2021-09-25'),

       ('102', '4', '2021-09-25'),

       ('102', '6', '2021-09-23'),

       ('102', '7', '2021-09-26'),

       ('103', '8', '2021-09-24'),

       ('103', '5', '2021-09-25'),

       ('103', '6', '2021-09-26'),

       ('103', '12', '2021-09-27'),

       ('103', '7', '2021-09-25'),

       ('103', '10', '2021-09-25'),

       ('103', '4', '2021-09-24'),

       ('103', '11', '2021-09-25'),

       ('103', '3', '2021-09-27'),

       ('104', '9', '2021-09-28'),

       ('104', '7', '2021-09-28'),

       ('104', '8', '2021-09-25'),

       ('104', '3', '2021-09-28'),

       ('104', '11', '2021-09-25'),

       ('104', '6', '2021-09-25'),

       ('104', '12', '2021-09-28'),

       ('105', '8', '2021-10-08'),

       ('105', '9', '2021-10-07'),

       ('105', '7', '2021-10-07'),

       ('105', '11', '2021-10-06'),

       ('105', '5', '2021-10-07'),

       ('105', '4', '2021-10-05'),

       ('105', '10', '2021-10-07'),

       ('106', '12', '2021-10-08'),

       ('106', '1', '2021-10-08'),

       ('106', '4', '2021-10-04'),

       ('106', '5', '2021-10-08'),

       ('106', '2', '2021-10-04'),

       ('106', '6', '2021-10-04'),

       ('106', '7', '2021-10-08'),

       ('107', '5', '2021-09-29'),

       ('107', '3', '2021-09-28'),

       ('107', '10', '2021-09-27'),

       ('108', '9', '2021-10-08'),

       ('108', '3', '2021-10-10'),

       ('108', '8', '2021-10-10'),

       ('108', '10', '2021-10-07'),

       ('108', '11', '2021-10-07'),

       ('109', '2', '2021-09-27'),

       ('109', '4', '2021-09-29'),

       ('109', '5', '2021-09-29'),

       ('109', '9', '2021-09-30'),

       ('109', '8', '2021-09-26'),

       ('1010', '2', '2021-09-29'),

       ('1010', '9', '2021-09-29'),

       ('1010', '1', '2021-10-01');

 二、练习题

1. 查询累积销量排名第二的商品

题目需求查询订单明细表(order_detail)中销量(下单件数)排名第二的sku_id,如果不存在返回null,如果存在多个排名第二的商品则需要全部返回。期望结果如下:

分析

代码分析

  1. 按 sku_id分组求和

 Drop view if exists v_1;

Create view v_1 as select sku_id,sum(sku_num) order_num from order_detail  group by sku_id;

Select * from v_1;

  1. 根据order_num 求名次

 Drop view if exists v_2;

Create view v_1 as  select  sku_id,order_num,dense_rank()  over(order by order_num)rk from v_1;

注意:(1)dense_rank() over(order by sku_num desc)中over() 没有partition by 字段

根据整个记录集范围内按销量进行排序,有可能存在并列情况

考虑有并列情况

(2) 以下排序函数,若第3名有并列情况,在并列名次区别

row_number() over(order by sku_num desc)

  1,2,3,4,5

rnak()dense_rank() over(order by sku_num desc)

1,2,3,3,4

dense_rank() over(order by sku_num desc)

1,2,3,3,5

  1. 显示结果

Select sku_id  from v_1  where rk=2;

代码实现

hive>

select sku_id

from ( select sku_id

         from ( select sku_id,order_num,dense_rank() over (order by order_num desc) rk

                  from (

                     select sku_id,sum(sku_num) order_num from order_detail   group by sku_id

                       ) t1

              ) t2

         where rk = 2

     ) t3

         right join --为保证,没有第二名的情况下,返回null

     ( select 1 ) t4;

2. 查询至少连续三天下单用户

 题目需求  查询订单信息表(order_info)中最少连续3天下单的用户id,期望结果如下:

   分析:

    本题重点   user_id,create_date

(1)分组并去重 ( user_id, create_date相同的只有一条记录)

  Drop view if exists v_1;

 Create view v_1 as select distinct  user_id, create_date from order_info  

   

(2)根据user_id 分组,按create_date字段排序

 Drop view if exists v_2;

create view  v_2 as  

select user_id, create_date, date_sub(create_date, row_number() over (partition by user_id order by create_date)) flag from v_1;

(3)

  Select user_id  from v_2  group by user_id,flag having count(flag)>=3;

   结果如下:

   

代码:

hive>

select distinct user_id

from (

         select user_id

         from ( select user_id, create_date, date_sub(create_date, row_number() over (partition by user_id order by create_date)) flag

                  from ( select user_id, create_date from order_info  group by user_id, create_date

                       ) t1 --同一天同一个用户可以下单多次,所以要分组 同一天可能多个用户下单,进行去重

              ) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果

         group by user_id, flag

         having count(flag) >= 3 -- 连续下单大于等于三天

     ) t3;

3. 查询各品类销售商品的种类数及销量最高的商品

题目需求:从订单明细表(order_detail)统计各品类销售出的商品种类数累积销量最好的商品,期望结果如下:

category_id

category_name

sku_id

name

order_num

sku_cnt

1

数码

2

手机壳

302

4

2

厨卫

8

微波炉

253

4

3

户外

12

遮阳伞

349

4

category_id(分类id)

category_name(分类名称)

sku_id(销量最好的商品id)

name(商品名称)

order_num(销量最好的商品销量)

sku_cnt(商品种类数量)

本题题义

   显示6个字段 连接三个表,

       其中order_num是同一商品销量

ku_cnt同一类别商品类别

  1. 统计各商品销售数量

Drop view if exists v_1;

Create view v_1  as select sku_id, sum(sku_num) order_num  from order_detail  group by sku_id;

Select *  v_1;

  1. 三表内接

Drop view if exists v_2;

Create view v_2 as  Select cate.category_id,cate.category_name ,v.sku_id,sku.name,v.order_num from v_1 v

join sku_info sku

join category_info cate on  sku.sku_id=v.sku_id and cate.category_id=sku.category_id

  1. 统计商品类别数量,根据商品类别 进行销售数量排序

Drop view if exists v_3;

Create view v_3 as Select category_id,category_name,sku_id,name,order_num,

rank() over(partition by category_id order by order_num desc) rk,

count(*) over(partition by category_id) s

From  v_2

4查询结果

 Select * from v_3  where rk=1;

 代码:            

hive>

select category_id,

       category_name,

       sku_id,

       name,

       order_num,

       sku_cnt

from (  select od.sku_id,

                sku.name,

                sku.category_id,

                cate.category_name,

                order_num,

                rank() over (partition by sku.category_id order by order_num desc) rk,

                count(od.sku_id) over (partition by sku.category_id) sku_cnt

           from ( select sku_id, sum(sku_num) order_num  from order_detail  group by sku_id ) od

           left join   sku_info sku  on od.sku_id = sku.sku_id

           left join   category_info cate   on sku.category_id = cate.category_id ) t1

where rk = 1;

4.查询用户的累计消费金额及VIP等级

题目需求

从订单信息表(order_info)中统计每个用户截止其每个下单日期的累积消费金额,以及每个用户在其每个下单日期的VIP等级。用户vip等级根据累积消费金额计算,计算规则如下:

设累积消费总额为X,

若0=<X<10000,则vip等级为普通会员

若10000<=X<30000,则vip等级为青铜会员

若30000<=X<50000,则vip等级为白银会员

若50000<=X<80000,则vip为黄金会员

若80000<=X<100000,则vip等级为白金会员

若X>=100000,则vip等级为钻石会员

期望结果如下:

user_id(用户id)  create_date(下单日期) sum_so_far

(截至每个下单日期的累计下单金额)

vip_level(每个下单日期的VIP等级)

101   2021-09-27   29000.00  青铜会员

101   2021-09-28   99500.00  白金会员

101   2021-09-29   142800.00  钻石会员

101   2021-09-30   143660.00  钻石会员

102   2021-10-01   171680.00  钻石会员

102    2021-10-02  177850.00  钻石会员

103    2021-10-02  69980.00   黄金会员

103    2021-10-03  75890.00  黄金会员

104    2021-10-03  89880.00  白金会员

105   2021-10-04   120100.00  钻石会员

106   2021-10-04    9390.00    普通会员

106   2021-10-05  119150.00  钻石会员

107   2021-10-05   69850.00  黄金会员

107  2021-10-06    124150.00 钻石会员

108  2021-10-06   101070.00钻石会员

108   2021-10-07  155770.00 钻石会员

109  2021-10-07  129480.00钻石会员

109   2021-10-08  153500.00钻石会员

1010   2021-10-08  51950.00  黄金会员

分析:

  1. 用户每个下单日期金额  根据user_id、create_date分组求和

   下次日期金额采用表 order.info字段total_amount

        同一单中 表order.detail中 price*sku_num和与order.info字段total_amount相等。

  Drop view if exists v_1;

Create view v_1 as select user_id,create_date,sum(total_amount)s from order_info group by   user_id,create_date;

Select * from v_1;

  1. 累计求和

 Drop view if exists v_2;

Create view v_2  as  select *,sum(s) over(partition by user_id order by create_date) sum_so_far

From  v_1;

  1. 查询结果

 Select user_id,create_date, sum_so_far,

      case  when sum_so_far >= 100000 then '钻石会员'

            when sum_so_far >= 80000 then '白金会员'

            when sum_so_far >= 50000 then '黄金会员'

            when sum_so_far >= 30000 then '白银会员'

            when sum_so_far >= 10000 then '青铜会员'

            when sum_so_far >= 0 then '普通会员'

        end vip_level

      From v_2;

注意:在查询视图时中文字段出现????

处理办法:

在MySQL中执行下面的两句修改命令,将编码修改为utf8

#mysql -uroot -p

Mysql>use hive ;

 mysql> ALTER TABLE `TBLS` MODIFY COLUMN VIEW_EXPANDED_TEXT mediumtext CHARACTER SET utf8;

mysql> ALTER TABLE `TBLS` MODIFY COLUMN VIEW_ORIGINAL_TEXT mediumtext CHARACTER SET utf8;

代码实现

hive>

select user_id,

       create_date,

       sum_so_far,

       case

           when sum_so_far >= 100000 then '钻石会员'

           when sum_so_far >= 80000 then '白金会员'

           when sum_so_far >= 50000 then '黄金会员'

           when sum_so_far >= 30000 then '白银会员'

           when sum_so_far >= 10000 then '青铜会员'

           when sum_so_far >= 0 then '普通会员'

           end vip_level

from (

         select user_id,

                create_date,

                sum(total_amount_per_day) over (partition by user_id order by create_date) sum_so_far

         from (

                  select user_id,

                         create_date,

                         sum(total_amount) total_amount_per_day

                  from order_info

                  group by user_id, create_date

              ) t1

     ) t2;

5. 查询首次下单后第二天连续下单的用户比率

题目需求

从订单信息表(order_info)中查询首次下单后第二天仍然下单的用户占所有下单用户的比例,结果保留一位小数,使用百分数显示,期望结果如下:

percentage

60.0%

分析

分析

  1. 取两字段并去重

   Drop view if exists v_1;

   Create view  v_1  as select user_id, create_date  from order_info  group by user_id, create_date

  1. 根据user_id分组,  按create_date排名次,为下一步,保证每一分组至少有两条记录

 Drop view if exists v_2;

   Create view  v_2  as  select user_id, create_date, rank() over (partition by user_id order by create_date) rk

               From  v_1

  1. 每组最多取两个记录 ,每组中取一个最大,最小

Drop view if exists v_3;

Create view v_3  as select user_id, min(create_date) buy_date_first, max(create_date) buy_date_second  from v_2  where rk<=2 group by user_id

(4)

select concat(round(sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0)) / count(*) * 100, 1), '%') percentage  from  v_3

注意:

 x= datediff(buy_date_second, buy_date_first) = buy_date_second- buy_date_first

  If(x = 1, 1, 0)  //如果=1

 统计相邻天下单用户数

sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0))  相邻天下单+1 否则+0

Concat(a,b) 将a与b连接成字符串

代码实现

select concat(round(sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0)) / count(*) * 100, 1), '%') percentage

from (select user_id, min(create_date) buy_date_first, max(create_date) buy_date_second

         from ( select user_id, create_date, rank() over (partition by user_id order by create_date) rk

               from ( select user_id, create_date  from order_info  group by user_id, create_date

                       ) t1

              ) t2

         where rk <= 2  group by user_id

     ) t3;

6.每个商品销售首年年份、销售数量和销售金额

题目需求:从订单明细表(order_detail)统计每个商品销售首年的年份,销售数量和销售总额。

期望结果如下:

sku_id(商品id)year(销售首年年份)order_num(首年销量)order_amount(首年销售金额)

分析

  1. 转换日期并按年求和

 Drop view if exists v_1;

Create view v_1  as select sku_id,year(create_date)year, sku_num,price,price*sku_num s  from order_detail ;

(2)求和

 Drop view if exists v_2;

Create view v_2  as select sku_id,year, sum(sku_num)sku_sum,sum(s)salary  from v_1  group by  sku_id,year;

(3) 排序

   Drop view if exists v_3;

Create view v_3  as select sku_id,year,sku_sum,salary,rank() over(partition by sku_id order by year)rk from v_2

  1. 查询

 Select sku_id,year,sku_sum,salary from v_3 where rk=1;

代码

select sku_id,

       year(create_date) year,

       sum(sku_num) order_num,

       sum(price*sku_num) order_amount

from (    select order_id,

                sku_id,

                price,

                sku_num,

                create_date,

                rank() over (partition by sku_id order by year(create_date)) rk

         from order_detail

     ) t1

where rk = 1

group by sku_id,year(create_date);

7 筛选去年总销量小于100的商品

 题目需求:从订单明细表(order_detail)中筛选出去年总销量小于100的商品及其销量,假设今天的日期是2022-01-10,不考虑上架时间小于一个月(30天)的商品,期望结果如下:

 分析:

首先由生成记录集

Sku_id   sku_num  create_date   name   from_date(上架时间)

筛选记录  条件 year(create_date)=’2021’ and  from_date<date_sub(‘2022-01-10’,30)  

  1. 连接两表,生成需要字段,不考虑上架时间小于一个月的商品

 Drop view if exists v_1;

Create view v_1 as select t1.sku_id,t1.sku_num,t1.create_date,t2.name,t2.from_date  from order_detail t1 join sku_info t2  on t1.sku_id=t2.sku_id where datediff('2022-01-10',t2.from_date)>=30 and year(create_date)='2021'

  1. 销量求和,按条件查询

  Select sku_id,sum(sku_num) from v_1  group by sku_id  having sum(sku_num)<100;

   

8 查询每日新用户数

题目需求:从用户登录明细表(user_login_detail)中查询每天的新增用户数,若一个用户在某天登录了,且在这一天之前没登录过,则任务该用户为这一天的新增用户。期望结果如下:

login_date_first(日期)user_count(新增用户数)

分析:

(1)求每个用户最小日期

 select  user_id, min(date_format(login_ts,'yyyy-MM-dd')) login_date_first  from user_login_detail  group by user_id

  1. 先求每个用户首次登录的时间
  2. 首次登录时间分组统计用户的数量就是那一天新增用户数量

注意:由于login_ts字段值格式是,'yyyy-MM-dd  hh:mm:ss' 要格式化为,'yyyy-MM-dd'

代码:

select  login_date_first,  count(*) user_count

from(  select  user_id, min(date_format(login_ts,'yyyy-MM-dd'))  login_date_first

             from user_login_detail  group by user_id )t1

group by login_date_first;

9.统计每个商品销量最高的日期

题目需求:从订单明细表(order_detail)中统计出每种商品销售件数最多的日期及当日销量,如果有同一商品多日销量并列的情况,取其中的最小日期。

sku_id(商品id)  create_date(销量最高的日期)  sum_num(销量)

分析

  1. 统计每种商品每日销量

 Drop view if exists v_1;

Create view v_1 as   select sku_id, create_date, sum(sku_num) sum_num

                  from order_detail  group by sku_id, create_date

  1. 销量排序

Drop view if exists v_2;

Create view v_2 as   select sku_id,  create_date,  sum_num,

                row_number() over (partition by sku_id order by sum_num desc, create_date asc) rn

         from v_1

 注意  如果有同一商品多日销量并列的情况,取其中的最小日期 是这条语句实现

row_number() over (partition by sku_id order by sum_num desc,create_date asc) rn

                                     按销量逆序          日期顺序排序

所以表中第一记录是销量最大,每天销量  

  1. 查询结果

 Select  * from v_2 wherern=1;

代码:

select sku_id,

       create_date,

       sum_num

from (

         select sku_id,

                create_date,

                sum_num,

                row_number() over (partition by sku_id order by sum_num desc,create_date asc) rn

         from (

                  select sku_id,

                         create_date,

                         sum(sku_num) sum_num

                  from order_detail

                  group by sku_id, create_date

              ) t1

     ) t2

where rn = 1;

     

10 查询销售件数高于品类平均数的商品

题目需求 从订单明细表(order_detail)中查询累积销售件数高于其所属品类平均数的商品。

分析

 每种商品销售件数高于品类(category_id)平均数

   显示sku_id  name   sum_num cate_avg_num

   本题关键是某种商品品类平均数求法

      每种商品都属于某一品类 ,每一品类都 有一个category_id, 品类平均数是将品类所有商品销量和除以这类商品数量。

分析

  1. 统计每种商品销量和

  Drop view if exists v_1;

  Create view  v_1 as select sku_id,sum(sku_num)sum_num from order_detail group by sku_id ;

  Select * from v_1;

  1. 统计每种商品按品种类型求平均销售数

  Create view v_2 as select t1.sku_id,t2.category_id,t2.name,t1.sum_num,

                         Avg(t1.sum_num) over(partition by t2.category_id) cate_avg_num

From v_1 t1

Left join sku_info t2  on t1.sku_id=t2.sku_id

  1. 查询sum_num>cate_avg_num记录

代码:

select sku_id,

       name,

       sum_num,

       cate_avg_num

from ( select od.sku_id, category_id,name, sum_num,

                avg(sum_num) over (partition by category_id ) cate_avg_num

         from ( select sku_id, sum(sku_num) sum_num from order_detail  group by sku_id

              ) od

              left join sku_info sku

              on od.sku_id = sku.sku_id) t1

where sum_num > cate_avg_num;

11 用户注册、登录、下单综合统计

题目需求从用户登录明细表(user_login_detail)和订单信息表(order_info)中查询每个用户的注册日期(首次登录日期)、总登录次数以及其在2021年的登录次数、订单数和订单总额。期望结果如下:

user_id(用户id)register_date(注册日期) total_login_count(累积登录次数) login_count_2021(2021年登录次数)

order_count_2021(2021年下单次数)    order_amount_2021(2021年订单金额)

分析

  1.  此题从表user_login_detail找出用户的最早最早注册日期,统计登录次数,2021登录次数
  2. order_info表找出 2021年订单数和订单额
  3.  两个查询结果join

(1)找出 user_login_detail中最早注册日期,统计登录次数,2021登录次数

Drop view if exists v_1;

Create view v_1 as  select user_id,

            min(date_format(login_ts, 'yyyy-MM-dd')) register_date,

           count(1) total_login_count,

           count(if(year(login_ts) = '2021', 1, null)) login_count_2021

            from user_login_detail

            group by user_id

(2)查询用户2021销售当量及销售额

Drop view if exists v_2;

Create view v_2 as  select user_id,

               count(order_id) order_count_2021,

               sum(total_amount) order_amount_2021

         from order_info

         where year(create_date) = '2021'

         group by user_id

代码:

Select login.user_id,register_date,total_login_count,login_count_2021,order_count_2021, order_amount_2021

from (  select user_id,

            min(date_format(login_ts, 'yyyy-MM-dd')) register_date,

            count(1) total_login_count,

           count(if(year(login_ts) = '2021', 1, null)) login_count_2021

         from user_login_detail

         group by user_id

     ) login

         join

     (

         select user_id,

               count(order_id) order_count_2021,

               sum(total_amount) order_amount_2021

         from order_info

         where year(create_date) = '2021'

         group by user_id

     ) oi

     on login.user_id = oi.user_id;

12 查询指定日期的全部商品价格

 题目需求:从商品价格修改明细表(sku_price_modify_detail)中查询截至到2021年10月01号的最新商品价格,假设所有商品初始价格默认都是99。期望结果如下:

sku_id(商品id)price(商品价格)

1               2000.00

2               10.00

3               5000.00

4               6000.00

5               500.00

6               2000.00

7               100.00

分析:

分析

表结构分析 sku_info、 sku_price_modify_detail

sku_info 商品表

sku_price_modify_detail  商品价格变化表

题意:根据表sku_price_modify_detail表2021-10-01最新价格,查询sku_info表中商品价格,若 商品价格没变动为默认值99

1找出2021商品最新价格

DROP view  IF EXISTS v_1;

   Create view v_1 as  select sku_id, new_price, change_date,

                         row_number() over (partition by sku_id order by change_date desc) rn

                  from sku_price_modify_detail

                  where change_date <= '2021-10-01'

注意, row_number() over (partition by sku_id order by change_date desc) rn  根据商品ID号分组,每组按 change_date 逆序排名 ,第一名为最新日期的价格。

(3)根据表 sku_info确定商品新价格

select t1.sku_id, nvl(new_price, 99) price

from sku_info t1

         left join (select sku_id, new_price from v_1 where rn=1)t2

           on t1.sku_id=t2.sku_id

  

注意:商品价格没变动就是将表sku_info 与v_1 左连后 字段值new_price为空,否则为变化值

nvl(new_price, 99) :如果new_price字段值为null,那么返回99,如果不为null,返回new_price本身值

代码:

select sku_info.sku_id,

       nvl(new_price, 99) price

from sku_info

         left join

     (   select sku_id, new_price

         from ( select sku_id, new_price, change_date,

                         row_number() over (partition by sku_id order by change_date desc) rn

                  from sku_price_modify_detail

                  where change_date <= '2021-10-01'

              ) t1

         where rn = 1

     ) t2

     on sku_info.sku_id = t2.sku_id;

13 即时订单比例

题目需求:订单配送中,如果期望配送日期和下单日期相同,称为即时订单,如果期望配送日期和下单日期不同,称为计划订单。

请从配送信息表(delivery_info)中求出每个用户的首单(用户的第一个订单)中即时订单的比例,保留两位小数,以小数形式显示。期望结果如下:

percentage

0.6

分析:delivery_info结构

 `delivery_id  string comment '配送单id',

    order_id    string comment '订单id',

    user_id    string comment '用户id',

    order_date  string comment '下单日期',

    custom_date  string comment '期望配送日期'

Drop view if exists v_1;

 Create view v_1 as select  delivery_id,  user_id,  order_date, custom_date,

        row_number() over (partition by user_id order by order_date) rn

from delivery_info;

注意:排序是为了获得首单

Select * from v_1;

  1. 找出rn=1的记录,再统计 所有记录数、order_date=custom_date记录数

     select

       round(sum(if(order_date=custom_date,1,0))/count(*),2) percentage

     From v_1   where rn=1;

  Round(,) ---取小数位数

Sum() 对1或0求和 统计日期相等记录数

  if(order_date=custom_date,1,0)  日期相等 为1 ,否则为0

Count(*)所有记录数

代码:

select

    round(sum(if(order_date=custom_date,1,0))/count(*),2) percentage

from

(    select  delivery_id,  user_id,  order_date, custom_date,

        row_number() over (partition by user_id order by order_date) rn

    from delivery_info

)t1

where rn=1;

2.14 向用户推荐朋友收藏的商品

题目需求:现需要请向所有用户推荐其朋友收藏 但是用户自己未收藏的商品,请从好友关系表(friendship_info)和收藏表(favor_info)中查询出应向哪位用户推荐哪些商品。期望结果如下:

部分结果展示

user_id(用户id)sku_id(应向该用户推荐的商品id)

101           2

101           4

101           7

101           9

101           8

101          11

101          1

分析:

friendship_info   用户关系表'    

user1_id(用户)       user2_id(用户朋友)

 favor_info 爱好表

user_id (用户id) sku_id(商品id) create_date (收藏日期) 

解答本题关键是 找出自己的朋友收藏商品号

               找出自己收获商品号

 然后求这两个集中差集

代码:

   --  (A except B : 返回仅出现在A中的记录。注: except 时两个表的字段名、字段顺序要一致)

--用户user1_id 的朋友收藏的商品sku_id

select  t.user1_id as user_id, sku_id

from friendship_info t join favor_info t1    on t.user2_id =t1.user_id           

except --取补集(差集)

-- 2)自己收藏的商品

select user_id, sku_id  from favor_info;

15. 查询所有用户的连续登录两天及以上的日期区间

题目需求  从登录明细表(user_login_detail)中查询出,所有用户的连续登录两天及以上的日期区间,以登录时间(login_ts)为准。期望结果如下:

user_id(用户id)  start_date(开始日期)   end_date(结束日期)

101   2021-09-27   2021-09-30

102   2021-10-01   2021-10-02

106   2021-10-04   2021-10-05

107   2021-10-05   2021-10-06

分析:

分析:

  1. 表结构

   Desc  user_login_detail;

   

  1.  变换日期格式 去重(分组每天只有一个记录)

Drop view if exists v_1;

      Create view v_1 as select user_id,date_format(login_ts,'yyyy-MM-dd') login_date from user_login_detail group by  user_id,date_format(login_ts,'yyyy-MM-dd');

注:user_id,date_format(login_ts,'yyyy-MM-dd') 去重

3根据用户分组,按日期排序 ,为确定连续下单准备

drop view if exists v_2;

create view v_2 as select user_id,login_date, row_number() over (partition by user_id order by login_date) rn  from  v_1;

  1. 得判断下单连续日期flag

drop view if exists v_3;

create view v_3 as select user_id,login_date, date_sub(login_date, rn) flag  from v_2;

注意将日期排名次 ,然后与名次相减 日期连续的,flag都相同

4在连续下单客户中找出最早,最迟下单日期

select user_id,

       min(login_date) start_date,

       max(login_date) end_date

From v_3 group by user_id,flag  having count(*)>=2

注意这里涉及一个算法

  统计flag次数 >=2,说明日期有连续的。

代码:

select user_id,

       min(login_date) start_date,

       max(login_date) end_date

from (

         select user_id,

                login_date,

                date_sub(login_date, rn) flag

         from (

                  select user_id,

                         login_date,

                         row_number() over (partition by user_id order by login_date) rn

                  from (

                           select user_id,

                                  date_format(login_ts, 'yyyy-MM-dd') login_date

                           from user_login_detail  group by user_id, date_format(login_ts, 'yyyy-MM-dd')

                       ) t1

              ) t2

     ) t3

group by user_id, flag

having count(*) >= 2;

16 男性和女性每日购物总金额统计

 题目需求 从订单信息表(order_info)和用户信息表(user_info)中,分别统计每天男性和女性用户的订单总金额,如果当天男性或者女性没有购物,则统计结果为0。期望结果如下:

create_date(日期)total_amount_male(男性用户总金额)total_amount_female(女性用户总金额)

2021-09-27         29000.00                             0.00

2021-09-28          70500.00                             0.00

2021-09-29          43300.00                             0.00

2021-09-30          860.00                               0.00

2021-10-01          0.00                                 171680.00

select create_date,

       sum(if(gender = '男', total_amount, 0)) total_amount_male,

       sum(if(gender = '女', total_amount, 0)) total_amount_female

from order_info oi

         left join

     user_info ui

     on oi.user_id = ui.user_id

group by create_date;

分析

表结构

Desc order_info;

Desc user_info

需要两个表:order_info表和user_info表

左连接--->根据下单日期分组--->通过if函数判断是男是女--->sum来统计总数

17 订单金额趋势分析

题目需求:查询截止每天的最近3天内的订单金额总和以及订单金额日平均值,保留两位小数,四舍五入。期望结果如下:

create_date(日期) total_3d(最近3日订单金额总和) avg_ad(最近3日订单金额日平均值)

2021-09-27  29000.00   29000.00  

2021-09-28   99500.00   49750.00

2021-09-29   142800.00  47600.00

2021-09-30   114660.00   38220.00

2021-10-01    215840.00   71946.67

2021-10-02   248690.00   82896.67

2021-10-03  343620.00    114540.00

2021-10-04   301430.00   100476.67

分析:

  1. 统计每日订单销售额

   Drop view if exists v_1;

    Create view v_1 as  select create_date, sum(total_amount) total_amount_by_day

                      from order_info   group by create_date;

  

(2)统计近三天销售量及平均销售量

select create_date,

       sum(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ) total_3d,

       round(avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ), 2) avg_3d

From  v_1

sum(total_amount_by_day) over (order by create_date  rows between 2 preceding and current row )

每行和是三行之和

avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row )

求和及平均

注意: order by create_date   按日期排序,

       rows between 2 preceding and current row  范围:当前行及前二行

select create_date,

       round(sum(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ),2) total_3d,

       round(avg(total_amount_by_day) over (order by create_date rows between 2 preceding and current row ), 2) avg_3d

from (  select create_date, sum(total_amount) total_amount_by_day

         from order_info   group by create_date

     ) t1;

18 购买过商品1和商品2但是没有购买商品3的顾客

题目需求 从订单明细表(order_detail)中查询出所有购买过商品1和商品2,但是没有购买过商品3的用户,

 分析

结构

order_detail表

     

order_info表

1根据用户分组,每个用户买过商品合并成一个集合skus

    Drop view if exists v_1;

create  view v_1 as  select user_id, collect_set(sku_id) skus

         from order_detail od  left join  order_info oi on od.order_id = oi.order_id

         group by user_id

  1. 找出含商品1,2,3客户

select user_id from v_1  where array_contains(skus ‘1’)

                          And array_contains(skus ‘2’)

                         And !array_contains(skus ‘3’)

   本题主要是两个函数应用:

      collect_set()将一个组的元素合并成一个集合

      Array_contains(,)判断集合是否包含某种元素

select user_id

from (  select user_id, collect_set(sku_id) skus

         from order_detail od

                  left join

              order_info oi

              on od.order_id = oi.order_id

         group by user_id

     ) t1

where array_contains(skus, '1')

  and array_contains(skus, '2')

  and !array_contains(skus, '3');

分析

19 统计每日商品1和商品2销量的差值

题目需求:从订单明细表(order_detail)中统计每天商品1和商品2销量(件数)的差值(商品1销量-商品2销量),期望结果如下:

create_date    diff

2021-09-27     2

2021-10-01    -10

分析

  1. 求每天每个商品销售和

Drop view if exists v_1;

 Create view v_1 as Select sku_id,create_date, sum(sku_num)s from order_detail where sku_id='1' or sku_id='2'  group by sku_id,create_date;

  

select create_date,

       sum(if(sku_id = '1', sku_num, 0)) - sum(if(sku_id = '2', sku_num, 0)) diff

from order_detail  group by create_date;

注意首先要找出符合条件suk_id=’1’ 和suk_id=’2’ order_detail表

如果没有条件就会出现0记录

20 查询出每个用户的最近三笔订单

题目需求:从订单信息表(order_info)中查询出每个用户的最近三笔订单,期望结果如下:

user_id  order_id    create_date

101      2         2021-09-28

101      3         2021-09-29

101      4         2021-09-30

102      5        2021-10-01

select user_id,

       order_id,

       create_date

from (   select user_id , order_id  , create_date

         , row_number() over (partition by user_id order by create_date desc) rk

         from order_info

     ) t1

where rk <= 3;

21查询每个用户登录日期的最大空档期

题目需求  从登录明细表(user_login_detail)中查询每个用户两个登录日期(以login_ts为准)之间的最大的空档期。统计最大空档期时,用户最后一次登录至今的空档也要考虑在内,假设今天为2021-10-10。期望结果如下:

user_id(用户id) max_diff(最大空档期)

101               10

102               9

103               10

104               9

105               6

106               5

107              10

 分析

(1)日期格式化,并去重(每个客户,每天登录时间只有一个)

 Drop view if exists v_1;

 Create view v_1 as select  user_id,

                date_format(login_ts,'yyyy-MM-dd') login_date

                from user_login_detail

                group by user_id,date_format(login_ts,'yyyy-MM-dd');

(2) 取得下一行日期

 Drop view if exists v_2;

  Create view v_2 as  select

            user_id,

            login_date,

            lead(login_date,1,'2021-10-10') over(partition by user_id order by login_date) next_login_date

        From  v_1;

注意窗口函数 lead(login_date,1,'2021-10-10')   取得当前行下一行日期,若下一行不存在,则为'2021-10-10'

(3)取得日期差值

Drop view if exists v_3;

  Create view v_3 as

select user_id, datediff(next_login_date,login_date) diff   From v_3

  注意函数datediff()取得两个日期差值

  (4)取差值每个用户最大值的记录

select  user_id, max(diff) max_diff   from v_3  group by user_id

代码:

select

    user_id,

    max(diff) max_diff

from

(

    select

        user_id,

        datediff(next_login_date,login_date) diff

    from

    (

        select

            user_id,

            login_date,

            lead(login_date,1,'2021-10-10') over(partition by user_id order by login_date) next_login_date

        from

        (

            select  user_id,

                date_format(login_ts,'yyyy-MM-dd') login_date

                from user_login_detail

               group by user_id,date_format(login_ts,'yyyy-MM-dd')

        )t1

    )t2

)t3

group by user_id;

22 查询相同时刻多地登陆的用户

题目需求:从登录明细表(user_login_detail)中查询在相同时刻,多地登陆(ip_address不同)的用户,期望结果如下:

user_id(用户id)

102

104

107

分析:

本题关键是要找出上次退出时间和IP

  1. 找出上次退出时间和ip

   Drop view if exists v_1;

Create view v_1 as select user_id, ip_address,login_ts,logout_ts,

                        -- 1) 计算上次登出时间(如果为null,则使用本次登录时间)

               lag(logout_ts, 1, login_ts) over (partition by user_id order by login_ts) last_logout_ts,

                           -- 2) 计算上次登录的ip地址(如果为null,则默认为 '000.000.000.000')

              lag(ip_address,1,'000.000.000.000') over (partition by user_id order by login_ts) last_ip_address

             from user_login_detail

注意:

 lag(logout_ts, 1, login_ts)  取当前记录前一条 如果存在,值为logout_ts 不存在取本条记录 login_ts值

  1. 查询结果

 select distinct user_id

from v_1   where login_ts < last_logout_ts  and ip_address != last_ip_address;

代码:

select distinct user_id

from (  select user_id, ip_address,login_ts,logout_ts,

        -- 1) 计算上次登出时间(如果为null,则使用本次登录时间)

       lag(logout_ts, 1, login_ts) over (partition by user_id order by login_ts) last_logout_ts,

       -- 2) 计算上次登录的ip地址(如果为null,则默认为 '000.000.000.000')

        lag(ip_address, 1, '000.000.000.000') over (partition by user_id order by login_ts) last_ip_address

         from user_login_detail

     ) t1

where login_ts < last_logout_ts  and ip_address != last_ip_address;

23 销售额完成任务指标的商品

题目需求:商家要求每个商品每个月需要售卖出一定的销售总额

假设1号商品销售总额大于21000,2号商品销售总额大于10000,其余商品没有要求

请写出SQL从订单详情表中(order_detail)查询连续两个月销售总额大于等于任务总额的商品

结果如下:

sku_id(商品id)

1

-- 求出1号商品 和 2号商品 每个月的购买总额 并过滤掉没有满足指标的商品

分析

  1. 求sku_id=’1’ sku_id=’2’ 每天销售额

   Drop view if exists  v_1;

Create view v_1 as select sku_id,create_date,sku_num*price day_sum  from order_detail where sku_id='1' or sku_id='2';

  

(1) 根据年月,按sku_i求和

Drop view if exists  v_2;

Create view v_2 as select date_format(create_date,'yyyy-MM')d,sku_id,sum(day_sum)mm_sum from v_1  group by date_format(create_date,'yyyy-MM'),sku_id;

(2) 计算上个月销售额

drop view if exists v_3;

   Create view v_3 as select sku_id, d,s,lag(mm_sum,1,0) over(partition by sku_id  order by d ) last_s1,  from v_2;

Select  *  from v_3;

    

  1. 从以上表知道,只要判断 sku_id=1 and s>,s1,s2

   Select  sku_id  from v_3  where  (sku_id='1' and mm_sum>21000 and last_s1>21000) or  (sku_id='2' and mm_sum>10000 and last_s1>10000);

  

24 根据商品销售情况进行商品分类

题目需求:从订单详情表中(order_detail)对 对商品进行分类,0-5000为冷门商品,5001-19999为一般商品,20000往上为热门商品,并求出不同类别商品的数量

结果如下:

Category(类型)   Cn(数量)

一般商品          1

冷门商品          10

热门商品          1

本题难点在于题目表述不清,容易造成理解不同

  (1)0-5000是销售额还是销售数量 ,以下统计数量

  (2)类别商品的数量 

是根据sku_id(商品)统计数量,还是根据sku_info表中字段category_id(商品类别)统计数量

以下统计商品数量而不是类别数量

分析

   表order_detail

  1. 统计商品销售件数

   Drop view  if exists v_1;

   Create view v_1 as select sku_id,sum(sku_num) s from order_detail group by sku_id;

Select  category,count(*) cn from

       (Select sku_id,

 case    when  s >=0 and s<=5000 then '冷门商品'

               when  s >=5001 and s<=19999 then '一般商品'

               when  s >=20000 then '热门商品'

       end  category

     from  v_1 )t1   group  by  category ;

   

参考代码

select

  t2.category,

  count(*) cn

from

  (

    select

      t1.sku_id,

      case

      when  t1.sku_sum >=0 and t1.sku_sum<=5000 then '冷门商品'

      when  t1.sku_sum >=5001 and t1.sku_sum<=19999 then '一般商品'

      when  t1.sku_sum >=20000 then '热门商品'

      end  category

    from

      (

        select

          sku_id,

          sum(sku_num)  sku_sum

        from

          order_detail

        group by

          sku_id

    )t1

)t2

group by

  t2.category

25.各品类销量前三的所有商品

题目需求:从订单详情表中(order_detail)和商品(sku_info)中查询各个品类销售数量前三商品。如果该品类小于三个商品,则输出所有的商品销量。

题义析:每个商品和,类别排名前三,且输出销量

Sku_id(商品id)Category_id(品类id)

2                    1

4                    1

1                    1

分析

表order_detail

表sku_info

(1)   求商品销量和 

    Drop view  if exists v_1;

    Create view v_1  as  select  sku_id,  sum(sku_num) sku_sum

                       from  order_detail   group by   sku_id;

(2)v_1 与表 sku_info左连,获得字段category_id

Drop view  if exists v_2;

Create view v_2  as  select t1.sku_id,t1.sku_sum,si.category_id  

from v_1 t1  join  sku_info si   on t1.sku_id=si.sku_id;

(3)根据销售额 sku_sum排序

   Drop view  if exists v_3;

Create view v_3  as  select sku_id,category_id,sku_sum,rank() over(partition by category_id  order by sku_sum desc)rk from v_2;

(4) 查询结果 

select stu_id,category_id,sku_sum  from v_3 where rk<=3;

代码:

select

  t2.sku_id,

  t2.category_id

from

  (

    select

      t1.sku_id,

      si.category_id,

    --商品类别排名

      rank()over(partition by category_id order by t1.sku_sum desc) rk

    from

      (  select

          sku_id,

          sum(sku_num) sku_sum

        from  order_detail

        group by   sku_id

    )t1

    join

      sku_info si

    on

      t1.sku_id=si.sku_id

    )t2

where   t2.rk<=3;

26.各品类中商品价格的中位数

题目需求:从商品信息表sku_info中求各分类商品价格的中位数。如果一个分类下商品个数是偶数则输出中间两个值的平均值,如果是奇数,则输出中间数即可。

分析

(1)按category_id分组,计算分组记录数cn,各记录在排名。

  Drop view if exists v_1;

Create view v_1 as  select sku_id, category_id, price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)   cn,

                count(*) over (partition by category_id) % 2  flag

         from sku_info;

(2)计算分组记录数为偶数价格中数偶数中值

select distinct category_id, avg(price) over (partition by category_id) medprice

From v_1  where flag = 0  and (rn = cn / 2 or rn = cn / 2 + 1);

注意: where flag = 0  and (rn = cn / 2 or rn = cn / 2 + 1) 按条件取出两条记录

(3)计算分组记录数为奇数的价格中数

   select  category_id, price  medprice

From v_1  where flag = 1  and  rn = round(cn / 2) ;  四舍五入,rn一定是中间数

  1. 两种计算进行union

完整代码:

select distinct category_id, avg(price) over (partition by category_id) medprice

from (        select sku_id, category_id, price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)        cn,

                count(*) over (partition by category_id) % 2   flag

         from sku_info

     ) t1

where flag = 0 and (rn = cn / 2 or rn = cn / 2 + 1)

union

select category_id, price

from (         select sku_id, category_id,  price,

                row_number() over (partition by category_id order by price desc) rn,

                count(*) over (partition by category_id)         cn,

                count(*) over (partition by category_id) % 2         flag

         from sku_info

     ) t1

where flag = 1  and rn = round(cn / 2);

27. 找出销售额连续3天超过100的商品

题目需求:从订单详情表(order_detail)中找出销售额连续3天超过100的商品

结果如下:

Sku_id(商品id)

1

10

分析

(1)统计商品日销售额>100

Drop view if exists v_1;

     Create view v_1 as select sku_id,create_date, sum(sku_num*price) day_sum  from order_detail  group by sku_id,create_date  having(sum(sku_num*price))>100;

(2)确定连续天数

   Drop view if exists v_2;

     Create view v_2 as  select sku_id,create_date, day_sum,

                Date_sub(create_date, Rank() over(partition by sku_id order by create_date)) rk

                From v_1;

(3)查询

  Select sku_id,  from v_2  group by sku_id, rk having count(*)>=3;

28 查询有新注册用户的当天的新用户数量、新用户的第一天留存率

 题目需求:从用户登录明细表(user_login_detail)中首次登录算作当天新增,第二天也登录了算作一日留存

结果如下:

 分析:

  1. 每天登录客户 去重

  Drop view if exists v_1;

  Create view v_1 as select user_id,date_format(login_ts,'yyyy-MM-dd') login_date

from user_login_detail  group by user_id,date_format(login_ts,'yyyy-MM-dd')

(2) 取下一条记录的登录日期lead_date,并排序 rk

 Drop view if exists v_2;

  Create view v_2 as      

           SELECT user_id,login_date,

            lead(login_date)over(partition by user_id order by login_date) lead_date,

            rank()over(partition by user_id order by login_date) rk

From  v_1

 lead(login_date) over(partition by user_id order by login_date)或

 lead(login_date,1) over(partition by user_id order by login_date)

  取下一条记录字段login_date

  1. 统计首次注册人数 registe

SELECT login_date ,

       count(*) register,

       cast(sum(if(datediff(lead_date,login_date)=1,1,0))/count(*) as decimal(16,2)) retention

From v_2  where rk=1  group by login_date

注意:where rk=1  首次注册

if(datediff(lead_date,login_date)=1,1,0)   lead_date,login_date日期相减=1,说明前后天连续登录用户

 If()=1 否则=0

  Sum () 求连续登录用户之和           

代码:

SELECT login_date

       count(*) register,

       cast(sum(if(datediff(lead_date,login_date)=1,1,0))/count(*) as decimal(16,2)) retention

from(  SELECT user_id,login_date,

       lead(login_date)over(partition by user_id order by login_date) lead_date,

       rank()over(partition by user_id order by login_date) rk

from( select user_id,date_format(login_ts,'yyyy-MM-dd') login_date

from user_login_detail  group by user_id,date_format(login_ts,'yyyy-MM-dd')

  )t1

  )t2

where rk=1   group by login_date

29 求出商品连续售卖的时间区间

题目需求:从订单详情表(order_detail)中,求出商品连续售卖的时间区间

结果如下(截取部分):

Sku_id(商品id)Start_date(起始时间)End_date(结束时间)

1    2021-09-27            2021-09-27

1     2021-09-30           2021-10-01

1     2021-10-03           2021-10-08

10    2021-10-02           2021-10-03

10    2021-10-05           2021-10-08

11    2021-10-02          2021-10-08

12    2021-09-30          2021-09-30

12    2021-10-02          2021-10-06

分析

  1. 找出dd2

按user_id分组,组内按create_date排序kr

 dd2=create-rk   组内如果dd2相等,则是日期是连续的。

   Drop view if exists v_1;

    Create view v_1  as select

sku_id, create_date,dense_rank()over(partition by sku_id order by create_date) rk,

(date_sub(create_date , dense_rank()over(partition by sku_id order by create_date))) as dd2

from order_detail

 

  1. 根据dd2由于此分组日期连接,求每组最大时间最小时间

    Select sku_id,

min(create_date)  as start_date,

max(create_date)  as end_date

from  v_1 group by sku_id,dd2

代码

select

A.sku_id,

min(A.create_date)  as start_date,

max(A.create_date)  as end_date

from

(select

sku_id, create_date,

(date_sub(create_date , dense_rank()over(partition by sku_id order by create_date))) as dd2

from order_detail )A

group by A.sku_id,A.dd2

30. 登录次数及交易次数统计

题目需求:分别从登录明细表(user_login_detail)和配送信息表(delivery_info)中,根据用户登录时间和下单时间统计每个用户在每一天的登录次数和交易次数。

 期望结果如下:

需要用到的表:

用户登录明细表:user_login_detail

配送信息表:delivery_info

分析

1统计每个 user_id 在 每一天的登录次数

  Drop view if exists v_1;

Create view v_1 as  select user_id, date_format(login_ts, 'yyyy-MM-dd') as login_date, count(*)login_count

         from user_login_detail

         group by user_id, date_format(login_ts, 'yyyy-MM-dd');

2) 统计每个 user_id 在每一天的交易(下单)次数

   Drop view if exists v_2;

Create view v_2 as   select user_id,  order_date,  count(*) as order_count

                      from delivery_info   group by user_id, order_date;

(3)两个表连接得到结果

  select t1.user_id   as user_id,t1.login_date login_date, t1.login_count  login_count,

       nvl(t2.order_count, 0) as order_count

From v_1 t1

Join v_2 t2   on t1.user_id = t2.user_id and t1.login_date = t2.order_date;

代码:

select t1.user_id             as user_id,

       t1.login_date          as login_date,

       t1.login_count         as login_count,

       nvl(t2.order_count, 0) as order_count

from (

         -- 1) 统计每个 user_id 在 每一天的 登录次数

         select user_id,

                date_format(login_ts, 'yyyy-MM-dd') as login_date,

                count(*)      as login_count

         from user_login_detail

         group by user_id, date_format(login_ts, 'yyyy-MM-dd')

     ) t1

         left join

     (

         -- 2) 统计每个 user_id 在 每一天的 交易(下单)次数

         select user_id,

                order_date,

                count(*) as order_count

         from delivery_info

         group by user_id, order_date

     ) t2

     on t1.user_id = t2.user_id and t1.login_date = t2.order_date;

31.按年度列出每个商品销售总额

题目需求:从订单明细表(order_detail)中列出每个商品每个年度的购买总额

分析

订单明细表:order_detail

从订单明细表(order_detail)中列出每个商品每个年度的购买总额

结果如下(截取部分):

select  sku_id,  year(create_date) year_date,  sum(price*sku_num) sku_sum

from  order_detail   group by   sku_id,year(create_date)

32. 某周内每件商品每天销售情况

 题目需求:从订单详情表(order_detail)中查询2021年9月27号-2021年10月3号这一周所有商品每天销售情况。

需要用到的表:

订单明细表:order_detail

结果如下:

select  sku_id,

  sum(if(dayofweek(create_date)=2,sku_num,0)) Monday,

  sum(if(dayofweek(create_date)=3,sku_num,0)) Tuesday,

  sum(if(dayofweek(create_date)=4,sku_num,0)) Wednesday,

  sum(if(dayofweek(create_date)=5,sku_num,0)) Thursday,

  sum(if(dayofweek(create_date)=6,sku_num,0)) Friday,

  sum(if(dayofweek(create_date)=7,sku_num,0)) Saturday,

  sum(if(dayofweek(create_date)=1,sku_num,0)) Sunday

from  order_detail   where

  create_date>='2021-09-27' and create_date<='2021-10-03'

group by   sku_id

注意: dayof week(create_date) 取日期周次 (1-7)

1  周日

2   周1

3   周2

4   周3

7   周六

33. 查看每件商品的售价涨幅情况

题目需求:从商品价格变更明细表(sku_price_modify_detail),得到最近一次价格的涨幅情况,并按照涨幅升序排序。

商品价格变更明细表:sku_price_modify_detail

 对每个商品按照修改日期倒序排序 并求出差值

Drop view if exists v_1;

Create view v_1 as select

  sku_id,  new_price-lead(new_price,1,0)  over(partition by sku_id order by change_date desc) price_change,

  rank()  over(partition by sku_id order by change_date desc) rk

From   sku_price_modify_detail

-- 最近一次修改的价格

Select  sku_id, price_change

From v_1   where   rk=1   order by price_change  

34. 销售订单首购和次购分析

 题目需求:通过商品信息表(sku_info)订单信息表(order_info)订单明细表(order_detail)分析如果有一个用户成功下单两个及两个以上的购买成功的手机订单(购买商品为xiaomi 10,apple 12,小米13)那么输出这个用户的id及第一次成功购买手机的日期和第二次成功购买手机的日期,以及购买手机成功的次数。

结果如下:

分析:用户购买二种或三种以上品牌产品 ,输出第一次和第二次购买时间,网上看到答案是输出最一次和最后一次时间。我以为有两个问题

  1. 用户购买两次或以上产品,网上没作判断,对于本题数不影响结果,因为所有用户都购买了两个或以上产品。
  2. 题目要求显示第一次和第二次购买时间,而网上给出的是第一次和最后一次时间。

需要用到的表

订单信息表:order_info

订单明细表:order_detail

商品信息表:sku_info

  1. 找出用户下单指定三种品牌记录

Drop view if exists v_1;

Create view v_1 as select user_id,od.create_date,name from order_detail od

Left join  order_info oi   on od.order_id=oi.order_id

Left join sku_info si   on  od.sku_id=si.sku_id

Where name in('xiaomi 13','xiaomi 10','apple 12');

  1. 找出两个或全以上商品用户

Drop view if exists v_2;

Create view v_2 as select user_id,create_date,name ,cnt from (select * ,count(*) over(partition by user_id)cnt  from v_1)t where cnt>=2;

  1. 按用户分组按create_date排序

  Drop view if exists v_3;

Create view v_3 as SELECT  *,row_number() over(partition by user_id order by create_date )rk from v_2 ;

 

 Select user_id, min(if( rk=1,create_date,null))first_date, min(if( rk=2,create_date,null))secend_date cnt from v_3 where rk<=2 group by user_id;

  附网上代码:

  WITH tmp as(  SELECT user_id,    od.create_date,name

      from order_detail od

      LEFT JOIN sku_info si      on od.sku_id = si.sku_id

      LEFT JOIN order_info oi      on od.order_id = oi.order_id

     where name in ('xiaomi 13','xiaomi 10','apple 12')

)

SELECT   user_id,first_date,last_date,   count(1) cn

from(   SELECT     user_id,

          max(create_date) over(PARTITION by user_id) last_date,

          min(create_date) over(PARTITION by user_id) first_date

      from tmp

)t1 group by user_id,first_date,last_date

35.同期商品售卖分析表

题目需求 从订单明细表(order_detail)中。求出同一个商品在2020年和2021年中同一个月的售卖情况对比。

题意分析:将每个商品2021、2022每一月销售量情况反映如下

         

  1. 求商品月销售量

Drop view if exists v_1;

Create view v_1  as  SELECT sku_id, MONTH(create_date) m, YEAR(create_date) y, sku_num from order_detail )

Select sku_id,y,m,sku_num from v_1  order by  sku_id,y,m;

sku_id   y     m   sku_num

1     2020 10 2

1 2021 9 9

1 2021 9 2

1 2021 10 8

1 2021 10 4

1 2021 10 8

1 2021 10 3

1 2021 10 8

1 2021 10 5

1 2021 10 2

10 2020 10 37

10 2020 10 57

10 2021 10 37

10 2021 10 48

10 2021 10 47

10 2021 10 22

10 2021 10 24

10 2021 10 27

SELECT sku_id,  m month,  sum(IF(y=2020,sku_num,0)) 2020_skusum,

                          sum(IF(y=2021,sku_num,0)) 2021_skusum

from v_1    group by sku_id,m

sku_idmonth 2020_skusum  2021_skusum

36 国庆期间每个品类的商品的收藏量和购买量

题目需求:从订单明细表(order_detail)和收藏信息表(favor_info)统计2021国庆期间,每个商品总收藏量和购买量

分析:

1)国庆期间每个品类的商品的收藏量

    Drop view if exists v_1;     

Create view v_1 as select sku_id, count(*) as favor_cn_tmp  from favor_info where '2021-10-01' <= create_date and create_date <= '2021-10-07'  group by sku_id  ;

  1.   国庆期间每个品类的商品的购买量

       Drop view if exists v_2;     

Create view v_2  as   select sku_id,  sum(sku_num) as sku_sum_tmp  from order_detail

       where '2021-10-01' <= create_date  and create_date <= '2021-10-07'   group by sku_id;

37 统计活跃间隔对用户分级结果

题目需求  用户等级:忠实用户:近7天活跃且非新用户

新晋用户:近7天新增

沉睡用户:近7天未活跃但是在7天前活跃

流失用户:近30天未活跃但是在30天前活跃

假设今天是数据中所有日期的最大值,从用户登录明细表中的用户登录时间给各用户分级,求出各等级用户的人数

分析

(1)

  Drop view  if  exists  v_1;

  Create view v_1 as SELECT  user_id,

        max(date_format(logout_ts,"yyyy-MM-dd")) over() today, --全局窗口

        --按用户id分组的窗口,求出最后一次登录日期

        max(date_format(login_ts,"yyyy-MM-dd")) over(PARTITION by user_id) last_login,

        --按用户id分组的窗口,求出第一次登录日期

        min(date_format(login_ts,"yyyy-MM-dd")) over(PARTITION by user_id) first_login

    from user_login_detail

(2)

 Drop view  if  exists  v_2;

  Create view v_2 as  SELECT

        user_id,

        case when first_login < date_sub(today,7) AND last_login >= date_sub(today,7) then '忠实用户'

                 when first_login >= date_sub(today,7) then '新增用户'

                 when last_login < date_sub(today,7) then '沉睡用户'

                 when last_login < date_sub(today,30) then '流失用户'

                 END   level

from  v_1;

查询汉字出现???,如何解决?

38.连续签到领金币数

题目需求 用户每天签到可以领1金币,并可以累计签到天数,连续签到的第3、7天分别可以额外领2和6金币。

每连续签到7天重新累积签到天数。

从用户登录明细表中求出每个用户金币总数,并按照金币总数倒序排序

表user_login_detail

 (1)字段login_ts格式转换“yyyy-MM-dd”,且去除不需要的字段

  Drop view if exists v_1;

 Create view v_1 as select user_id, date_format(login_ts, "yyyy-MM-dd") ts

from user_login_detail

  1. 统计用户的连续登录区间

 Drop view if exists v_2;

 Create view v_2 as select user_id,

       ts,

       row_number() over (partition by user_id order by ts)  rn,

       date_sub(ts, row_number() over (partition by user_id order by ts)) flag

From v_1

  1. 按照用户的id和flag进行分组,每个用户的不同登录区间找出来

ts表示登录的日期,rn是用了开窗排序,按照登录日期来排序,如果flag字段相等的话,就说明在这段时间是连续登录的。

表中101用户,登录区间是断开的,再次用row_number开个窗,按照用户的id和flag进行分组我们要怎么把1,2,3,4,5拆成1,2,3,4和1。

 Drop view if exists v_3;

 Create view v_3 as  select user_id,ts,

row_number() over (partition by user_id, flag order by ts) cnt

from  v_2;

   

  1. 对第三天登录和第七天登录的金币作标记3和7,其余的天数登录都是1。

 Drop view if exists v_4;

 Create view v_4 as select user_id,

       case

           when (cnt - 3) = 0  or (cnt - 3) % 7 = 0 then 3

           when (cnt % 7) = 0 then 7

           else 1 end glods

From v_3;

   Select * from v_4;

     

  1. 简单的分id做sum聚合

     select user_id, sum(glods) num  from v_4 group by user_id;

         

39.国庆期间的7日动销率和滞销率

题目需求:动销率定义为品类商品中一段时间内有销量的商品占当前已上架总商品数比例(有销量的商品/已上架总商品数)。

滞销率定义为品类商品中一段时间内没有销量的商品占当前已上架总商品数的比例。(没有销量的商品/ 已上架总商品数)。

只要当天任一店铺有任何商品的销量就输出该天的结果

从订单明细表(order_detail)确定定单时间(create_date)

商品信息表(sku_info)确定上架品类(category_id)品名name

  1. 生成字段为category_id,create_date,name国庆记录

Drop view if exists v_1;

 Create view v_1  as  select  distinct  si.category_id, od.create_date, si.name  from  order_detail od

          join  sku_info si  on  od.sku_id=si.sku_id

          where  od.create_date>='2021-10-01' and od.create_date<='2021-10-07'

  1. 统计每天品类数量

 Drop view if exists v_2;

 Create view v_2  as select

     t1.category_id,

     sum(if(t1.create_date='2021-10-01',1,0))  day_1,

     sum(if(t1.create_date='2021-10-02',1,0))  day-2,

     sum(if(t1.create_date='2021-10-03',1,0))  day-3,

     sum(if(t1.create_date='2021-10-04',1,0))  day-4,

     sum(if(t1.create_date='2021-10-05',1,0))  day-5,

     sum(if(t1.create_date='2021-10-06',1,0))  day-6,

     sum(if(t1.create_date='2021-10-07',1,0))  day-7 

  From v_1 t1   group by  t1.category_id;

(3)

 -- 每一天的动销率 和 滞销率

select

  t2.category_id,

  t2.day_1/t3.cn,  1-t2.day_1/t3.cn,

  t2.day_2/t3.cn,  1-t2.day_2/t3.cn,

  t2.day_3/t3.cn,  1-t2.day_3/t3.cn,

  t2.day_4/t3.cn,  1-t2.day_4/t3.cn,

  t2.day_5/t3.cn,  1-t2.day_5/t3.cn,

  t2.day_6/t3.cn,  1-t2.day_6/t3.cn,

  t2.day_7/t3.cn,  1-t2.day_7/t3.cn

From  v_2  t2

join  ( select  category_id, count(*) cn  from  sku_info  group by  category_id )t3

on   t2.category_id=t3.category_id

注意:  select  category_id, count(*) cn  from  sku_info  group by  category_id 统计上架整个品类数量

代码:

-- 每一天的动销率 和 滞销率

select

  t2.category_id,

  t2.day_1/t3.cn,  1-t2.day_1/t3.cn,

  t2.day_2/t3.cn,  1-t2.day_2/t3.cn,

  t2.day_3/t3.cn,  1-t2.day_3/t3.cn,

  t2.day_4/t3.cn,  1-t2.day_4/t3.cn,

  t2.day_5/t3.cn,  1-t2.day_5/t3.cn,

  t2.day_6/t3.cn,  1-t2.day_6/t3.cn,

  t2.day_7/t3.cn,  1-t2.day_7/t3.cn

from

  (  select

     t1.category_id,

     sum(if(t1.create_date='2021-10-01',1,0)) day_1,

     sum(if(t1.create_date='2021-10-02',1,0)) day_2,

     sum(if(t1.create_date='2021-10-03',1,0)) day_3,

     sum(if(t1.create_date='2021-10-04',1,0)) day_4,

     sum(if(t1.create_date='2021-10-05',1,0)) day_5,

     sum(if(t1.create_date='2021-10-06',1,0)) day_6,

     sum(if(t1.create_date='2021-10-07',1,0)) day_7

   from ( select  distinct  si.category_id, od.create_date, si.name  from  order_detail od

          join  sku_info si  on  od.sku_id=si.sku_id

          where  od.create_date>='2021-10-01' and od.create_date<='2021-10-07' )t1

       group by  t1.category_id

   )t2

join  ( select  category_id, count(*) cn  from  sku_info  group by  category_id )t3

on  t2.category_id=t3.category_id

40 同时在线最多的人数

 题目需求:根据用户登录明细表(user_login_detail),求出平台同时在线最多的人数。

分析:

(1)确定用户在线还是离线,tag=1,在线 tag =-1 离线

Drop view if exists v_1;

Create view v_1 as SELECT

      login_ts ts, 1 tag   FROM  user_login_detail

    UNION ALL

SELECT    logout_ts ts,   -1 tag    FROM    user_login_detail

注意: (1)两个数据集合并 需要相同字段

       (2) union 合并并去重,union all 合并不去重

       

  1. 计算tag和。

表中一个用户要么 tag=1 (上线)

            要么 tag=1,tag=-1(先上线,后下线)

所以统计 tag和,就可以确定线上人数。  

Drop view if exists v_2;

Create view v_2 as select sum(tag) over(order by ts) cn

FROM v_1;

注意:sum(tag) over(order by ts) cn

      求和范围是从开始到日前时刻ts时tag和。

注意:求和范围

 ts          tag  cn

2021-09-21 08:00:00 1      1

2021-09-22 09:00:00 1      2

2021-09-23 10:00:00 1      3

2021-09-24 11:00:00 1      4

2021-09-25 12:00:00 1      5

2021-09-26 13:00:00 1      6

2021-09-27 08:00:00 1      7

2021-09-27 08:30:00 -1     5

2021-09-27 08:30:00 -1     5

2021-09-27 09:30:00 -1     4

2021-09-27 10:30:00 -1     3

2021-09-27 11:30:00 -1     2

2021-09-27 12:30:00 -1     1

2021-09-27 13:30:00 -1     0

2021-09-27 14:00:00 1      1

2021-09-27 14:30:00 -1     0

2021-09-28 09:00:00 1     

2021-09-28 09:10:00 -1

2021-09-29 13:30:00 1

2021-09-29 13:50:00 -1

2021-09-30 20:00:00 1

2021-09-30 20:10:00 -1

2021-10-01 07:50:00 1

2021-10-01 08:00:00 1

2021-10-01 08:20:00 -1

2021-10-01 08:30:00 -1

2021-10-02 08:00:00 1

2021-10-02 08:30:00 -1

2021-10-03 07:50:00 1

2021-10-03 07:50:00 1

2021-10-03 08:20:00 -1

2021-10-03 08:40:00 1

2021-10-03 08:50:00 1

2021-10-03 09:20:00 -1

2021-10-03 10:20:00 -1

2021-10-03 10:30:00 -1

2021-10-04 08:40:00 1

2021-10-04 09:10:00 1

2021-10-04 09:30:00 -1

2021-10-04 10:30:00 -1

2021-10-05 21:50:00 1

2021-10-05 22:00:00 1

2021-10-05 22:40:00 -1

2021-10-05 23:00:00 -1

2021-10-06 08:50:00 1

2021-10-06 09:00:00 1

2021-10-06 09:00:00 1

2021-10-06 09:10:00 1

2021-10-06 10:00:00 -1

2021-10-06 10:00:00 -1

2021-10-06 10:20:00 -1

2021-10-06 10:20:00 -1

2021-10-06 22:00:00 1

2021-10-06 23:00:00 -1

2021-10-08 09:00:00 1

2021-10-08 09:10:00 -1

2021-10-09 08:50:00 1

2021-10-09 10:20:00 -1

(3) 统计线最大线上人数

Select  max(cn) cn from v_2;

代码:

-- 登录标记1 下线标记-1

select

  login_ts l_time,

  1 flag

from

  user_login_detail

union

select

  logout_ts l_time,

  -1 flag

from

  user_login_detail

-- 按照时间求和

select

  sum(flag)over(order by t1.l_time) sum_l_time

from

  (

    select

      login_ts l_time,

      1 flag

    from

      user_login_detail

    union

    select

      logout_ts l_time,

      -1 flag

    from

      user_login_detail

)t1  

-- 拿到最大值 就是同时在线最多人数

select

  max(sum_l_time)

from

  (

    select

      sum(flag)over(order by t1.l_time) sum_l_time

    from

      (

        select

          login_ts l_time,

          1 flag

        from

          user_login_detail

        union

        select

          logout_ts l_time,

          -1 flag

        from

          user_login_detail

    )t1

)t2

  • 11
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值