重叠问题-分组重叠

  • 1. 初始化数据
  • 1. 初始化数据
  • 2. 每组会话开始时间
  • 3. 每组会话结束时间
  • 4. 合并,最终sql如下
  • 5. 优化sql,视图处理
  • 6. 优化效率,自增长列
  • 7. 优化效率,临时表,没怎么看

 

1. 初始化数据

# 建立session表
DROP TABLE IF EXISTS session;
CREATE TABLE sessions
(
    id         INT         NOT NULL AUTO_INCREMENT,
    app         VARCHAR(10)     NOT NULL,
    usr         VARCHAR(10)     NOT NULL,
    starttime     TIME         NOT NULL,
    endtime     TIME         NOT NULL,
    PRIMARY KEY(id)
);

DELETE FROM sessions;
# 插入数据
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '08:30', '10:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '08:30', '08:35');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '09:00', '09:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '09:15', '10:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '09:15', '09:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '10:30', '14:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '10:45', '11:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '11:00', '12:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '08:30', '08:45');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '09:00', '09:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '11:45', '12:00');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '12:30', '14:00');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '12:45', '13:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '13:00', '14:00');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '14:00', '16:30');
INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '15:30', '17:00');

# 创建索引
CREATE UNIQUE INDEX idx_app_usr_s_e_key
    ON sessions(app, usr, starttime, endtime, id);
CREATE INDEX idx_app_s_e ON sessions(app, starttime, endtime)

 

 

2. 每组会话开始时间

  服务提供商可能允许多个session的连接,并把其计费统计为1次,这就是所谓的分组重叠。对于上面的例子,应该把app1、user1在08:30~10:30间的3次会话算为一次会话。
我们分步骤来讨论这个问题,先求出每个会话组开始时间,并用DISTINCT返回不重复的开始时间,具体过程如下:

SELECT DISTINCT app, usr, starttime AS a
FROM sessions AS a
WHERE NOT EXISTS(
    SELECT *
    FROM sessions AS b
    WHERE a.app = b.app AND a.usr = b.usr
    AND a.starttime > b.starttime AND a.starttime <= b.endtime
)

 

 

3. 每组会话结束时间

SELECT DISTINCT app, usr, endtime AS a
FROM sessions AS a
WHERE NOT EXISTS(
    SELECT * 
    FROM sessions AS b
    WHERE a.app=b.app AND a.usr=b.usr
    AND a.endtime>=b.starttime AND a.endtime<b.endtime
)

 

 

4. 合并,最终sql如下:

SELECT DISTINCT s.app, s.usr, s.s,
    (
        SELECT MIN(e)
        FROM (
            SELECT DISTINCT app, usr, endtime AS e
            FROM sessions AS a
            WHERE NOT EXISTS(
                SELECT * 
                FROM sessions AS b
                WHERE a.app=b.app AND a.usr=b.usr
                AND a.endtime>=b.starttime AND a.endtime<b.endtime
            )
        ) AS s2
        WHERE s2.e>s.s AND s.app=s2.app AND s.usr=s2.usr
    ) AS e
FROM
(
    SELECT DISTINCT app, usr, starttime AS s
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT *
        FROM sessions AS b
        WHERE a.app = b.app AND a.usr = b.usr
        AND a.starttime > b.starttime AND a.starttime <= b.endtime
    )
) AS s,

(
    SELECT DISTINCT app, usr, endtime AS e
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT * 
        FROM sessions AS b
        WHERE a.app=b.app AND a.usr=b.usr
        AND a.endtime>=b.starttime AND a.endtime<b.endtime
    )
) AS e
WHERE s.app = e.app AND s.usr=e.usr

 

5. 优化sql,视图处理

  以上代码看起来非常复杂,因为需要使用多个子查询,实际上可以通过创建视图来行简化上述的SQL查询。可以进行这样的操作,将上述两个会话组开始和结束的临时表定义为视图,然后再进一步操作,如下:

# 开始
CREATE     VIEW v_s AS 
SELECT DISTINCT app, usr, starttime AS s
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT *
        FROM sessions AS b
        WHERE a.app = b.app AND a.usr = b.usr
        AND a.starttime > b.starttime AND a.starttime <= b.endtime
    );

# 结束
CREATE VIEW v_e AS
SELECT DISTINCT app, usr, endtime AS e
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT * 
        FROM sessions AS b
        WHERE a.app=b.app AND a.usr=b.usr
        AND a.endtime>=b.starttime AND a.endtime<b.endtime
    );
    

# 查询    
SELECT DISTINCT s.app, s.usr, s.s,
    (
        SELECT MIN(e)
        FROM v_e AS s2
        WHERE s2.e>s.s AND s.app=s2.app AND s.usr=s2.usr
    ) AS e
FROM
v_s AS s,
v_e AS e
WHERE s.app = e.app AND s.usr=e.usr;

 

6. 优化效率,自增长列

  用视图作为派生表有一个缺点,那就是如果在一个查询中需要反复查询一个视图,那么这张视图需要被计算多次,如图,有这些东西应该是引起重视:

 

  可以看到v_e这张视图被计算了两次,如果表中的记录非常大,那么需要提高视图的执行效率。怎么才能避免一个视图被查询多次呢?有一种方法是使用临时表,将v_s、v_e两张视图作为临时表。如果使用了临时表,那么将不再需要MIN函数的这个子查询,因为我们可以对临时表增加一个自增长的列,然后进行匹配即可。存储过程如下所示:

SET @A=0;
SET @B=0;
SELECT DISTINCT  xx.app, xx.usr, s, e
FROM
(
    SELECT @B:=@B+1 AS id, app, usr, s 
        FROM (
            SELECT DISTINCT app, usr, starttime AS s
            FROM sessions AS a
            WHERE NOT EXISTS(
                SELECT *
                FROM sessions AS b
                WHERE a.app = b.app AND a.usr = b.usr
                AND a.starttime > b.starttime AND a.starttime <= b.endtime
            )
        ) AS p
) AS xx
,
(
    SELECT @A:=@A+1 AS id, app, usr, e
    FROM (
        SELECT DISTINCT app, usr, endtime AS e
        FROM sessions AS a
        WHERE NOT EXISTS(
            SELECT * 
            FROM sessions AS b
            WHERE a.app=b.app AND a.usr=b.usr
            AND a.endtime>=b.starttime AND a.endtime<b.endtime
        )
    ) AS o
) yy
WHERE xx.id=xx.id AND xx.app = yy.app AND xx.usr=xx.usr  

 

看图,3个减少到2个了,

 

7. 优化效率,临时表,没怎么看

CREATE PROCEDURE pGroupOverlap()
BEGIN
  DROP TABLE IF EXISTS $s;
  DROP TABLE IF EXISTS $e;
  
  CREATE TEMPORARY TABLE $s(
    id INT AUTO_INCREMENT,
    app VARCHAR(10),
    usr VARCHAR(10),
    starttime TIME,
    PRIMARY KEY(id),
    KEY(usr, app)
  )ENGINE=MEMORY;
  
  CREATE TEMPORARY TABLE $e(
    id INT AUTO_INCREMENT,
    app VARCHAR(10),
    usr VARCHAR(10),
    endtime TIME,
    PRIMARY KEY(id),
    KEY(usr, app)
  )ENGINE=MEMORY;
  
  INSERT INTO $s(app, usr, starttime)(
    SELECT DISTINCT app, usr, starttime AS s
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT *
        FROM sessions AS b
        WHERE a.app = b.app AND a.usr = b.usr
        AND a.starttime > b.starttime AND a.starttime <= b.endtime
    )
  );
  
  INSERT INTO $e(app, usr, endtime)(
    SELECT DISTINCT app, usr, endtime AS e
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT * 
        FROM sessions AS b
        WHERE a.app=b.app AND a.usr=b.usr
        AND a.endtime>=b.starttime AND a.endtime<b.endtime
    )
  );
END

-- 查询
SELECT s.app, s.usr, starttime, endtime
FROM $s AS s, $e AS e
WHERE s.app=e.app AND s.usr=e.usr AND s.id=e.id;

 

 

7. 优化效率,临时表,没怎么看

CREATE PROCEDURE pGroupOverlap()
BEGIN
  DROP TABLE IF EXISTS $s;
  DROP TABLE IF EXISTS $e;
  
  CREATE TEMPORARY TABLE $s(
    id INT AUTO_INCREMENT,
    app VARCHAR(10),
    usr VARCHAR(10),
    starttime TIME,
    PRIMARY KEY(id),
    KEY(usr, app)
  )ENGINE=MEMORY;
  
  CREATE TEMPORARY TABLE $e(
    id INT AUTO_INCREMENT,
    app VARCHAR(10),
    usr VARCHAR(10),
    endtime TIME,
    PRIMARY KEY(id),
    KEY(usr, app)
  )ENGINE=MEMORY;
  
  INSERT INTO $s(app, usr, starttime)(
    SELECT DISTINCT app, usr, starttime AS s
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT *
        FROM sessions AS b
        WHERE a.app = b.app AND a.usr = b.usr
        AND a.starttime > b.starttime AND a.starttime <= b.endtime
    )
  );
  
  INSERT INTO $e(app, usr, endtime)(
    SELECT DISTINCT app, usr, endtime AS e
    FROM sessions AS a
    WHERE NOT EXISTS(
        SELECT * 
        FROM sessions AS b
        WHERE a.app=b.app AND a.usr=b.usr
        AND a.endtime>=b.starttime AND a.endtime<b.endtime
    )
  );
END

-- 查询
SELECT s.app, s.usr, starttime, endtime
FROM $s AS s, $e AS e
WHERE s.app=e.app AND s.usr=e.usr AND s.id=e.id;

 

转载于:https://www.cnblogs.com/frank-quan/p/5807627.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值