20190510更新:注意一下join的形式
SELECT SUM(number)
FROM
(
SELECT v_id,COUNT(DISTINCT s_id) AS number
FROM(
(SELECT p.v_id,p.s_id,p.ts
FROM page_view p,dim_page d
WHERE d.page_id = p.page_id
AND page_name = '广告页A') AS t1
LEFT JOIN
(SELECT p.v_id,p.s_id,p.ts
FROM page_view p,dim_page d
WHERE d.page_id = p.page_id
AND page_name = '产品页B') AS t2
ON t1.v_id = t2.v_id AND t1.s_id = t2.s_id
LEFT JOIN
(SELECT p.v_id,p.s_id,p.ts
FROM page_view p,dim_page d
WHERE d.page_id = p.page_id
AND page_name = '预定页C') AS t3
ON t1.v_id = t3.v_id AND t1.v_id = t3.s_id -- 要保证t1t2t3的vid和sid都一样
-- (s_id一样代表跳转,vid一样代表连接同一用户)
WHERE t1.ts < t2.ts
AND t2.ts < t3.ts
GROUP BY t1.v_id,t1.s_id)AS total -- 这里group形成了总表合并满足条件的记录(比如a/M,a/N,b/T,b/s)
-- a,b代表v_id,MNTS代表s_id
GROUP BY v_id -- groupv_id之后就能知道每个v_id有几个MNTS也就是满足条件的路径了
);
-- 这里我认为当v_id一样是,seesionid一样的组成一条路径!!!!
DROP TABLE page_view;
CREATE TABLE page_view
(
vid VARCHAR(50) NOT NULL,
sid BIGINT(20) NOT NULL,
ts BIGINT(20) NOT NULL,
page_id INT(11) NOT NULL,
PRIMARY KEY (vid,sid,ts)
) ;
CREATE TABLE dim_page
(
page_id INT(11) NOT NULL,
page_name VARCHAR(25) NOT NULL,
PRIMARY KEY (page_id)
) ;
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',1,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',1,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',1,3,3);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',1,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',1,5,5);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',2,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',2,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',2,3,3);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',3,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('A',3,5,5);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',1,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',1,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',1,3,3);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',1,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',1,5,5);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',2,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',2,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',2,3,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',3,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('B',3,5,5);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',1,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',1,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',1,3,3);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',1,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',1,5,5);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',2,1,1);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',2,2,2);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',2,3,3);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',3,4,4);
INSERT INTO page_view(vid,sid,ts,page_id) VALUES ('C',3,5,5);
INSERT INTO dim_page(page_id,page_name) VALUES (1,'页面A');
INSERT INTO dim_page(page_id,page_name) VALUES (2,'页面B');
INSERT INTO dim_page(page_id,page_name) VALUES (3,'页面C');
INSERT INTO dim_page(page_id,page_name) VALUES (4,'页面D');
INSERT INTO dim_page(page_id,page_name) VALUES (5,'页面E');
SELECT *
FROM
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面A') AS t1
LEFT JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面B') AS t2
ON t1.vid=t2.vid AND t1.sid = t2.sid AND t1.ts < t2.ts
LEFT JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面C') AS t3
ON t2.vid=t3.vid AND t2.sid = t3.sid AND t2.ts < t3.ts;
这样left join 的后果
改进一下
SELECT *
FROM
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面A') AS t1
JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面B') AS t2
ON t1.vid=t2.vid AND t1.sid = t2.sid AND t1.ts < t2.ts
JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面C') AS t3
ON t2.vid=t3.vid AND t2.sid = t3.sid AND t2.ts < t3.ts;
看到了吗join是显示列合并就是列的行数变多!!!(不同于 union
而且要仔细思考会不会有一对多的情况,就是思考join之后会变成什么呢)
最后结果
SELECT COUNT(*)
FROM
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面A') AS t1
JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面B') AS t2
ON t1.vid=t2.vid AND t1.sid = t2.sid AND t1.ts < t2.ts
JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面C') AS t3
ON t2.vid=t3.vid AND t2.sid = t3.sid AND t2.ts < t3.ts;
结果最后为5
注意我join的话前面一定要有from不然出现语法错误
但是我就想用left join怎么办??!!好办!把left join on and换成left join on where!!!
SELECT *
FROM
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面A') AS t1
LEFT JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面B') AS t2
ON t1.vid=t2.vid AND t1.sid = t2.sid
LEFT JOIN
(SELECT p.vid,p.sid,p.ts
FROM page_view p
JOIN dim_page d
ON d.page_id = p.page_id
AND page_name = '页面C') AS t3
ON t2.vid=t3.vid AND t2.sid = t3.sid
WHERE t1.ts < t2.ts
AND t2.ts < t3.ts;