故障006:连接排序去重结果不如人愿
DM技术交流QQ群:940124259
1. 问题描述
百无聊赖而又炎热的下午,同事发消息问我某个查询SQL返回的结果集不对头,单从逻辑上看SQL写得没问题。后来远程过去查看执行计划,让wo大吃一惊,好好的左连接可以自动转为内连接,可活脱脱被转成左外半连接,结果集不如人愿。考虑到现在大量人使用咱达梦数据库,可能有人会遇到这种情况,提前通知揭晓一下。
上图见真相:a表lnggwnh字段明明在where中指定过滤只保留2022年的内容,结果是把b表的lnggwzid字段的值再复制一份放到a表lnggwnh上,返回的字段类型也不对。
此次不但解决了实际问题,同时加快查询速度。
客户环境:1-1-134-20.09.30-127948-ENT 单机 标红,特地强调同志别再用这个版本啦。
错误的执行计划
1 #NSET2: [145, 40, 168]
2 #PRJT2: [145, 40, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [145, 40, 168]; key_num(1), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #HASH LEFT SEMI JOIN2: [138, 68261, 168]; KEY_NUM(1); KEY(b.LNGGWZID=a.LNGGWZID) KEY_NULL_EQU(0) -- ERROR
5 #SLCT2: [1, 40, 108]; b.LNGDWID = var1
6 #CSCN2: [1, 387, 108]; INDEX33563314(GW_GWZ as b)
7 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var2
8 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
2. 分析过程
2.1 去掉distinct
结论:左连接正确地转换为内连接,返回结果集除了重复行外,大体上结果集是正确的。
SELECT /* DISTINCT*/ a.lnggwnh,b.lnggwzid,b.chrgwzmc
FROM gw_wh a
LEFT JOIN gw_gwz b ON a.lnggwzid = b.lnggwzid
WHERE a.lnggwnh = 2022
AND b.lngdwid = 1374972990848155649
order by b.lnggwzid desc;
/*
1 #NSET2: [144, 68261, 168]
2 #PRJT2: [144, 68261, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [144, 68261, 168]; key_num(1), is_distinct(FALSE), top_flag(0), is_adaptive(0)
4 #HASH2 INNER JOIN: [138, 68261, 168]; LKEY_UNIQUE KEY_NUM(1); KEY(b.LNGGWZID=a.LNGGWZID) KEY_NULL_EQU(0)
5 #SLCT2: [1, 40, 108]; b.LNGDWID = var1
6 #CSCN2: [1, 387, 108]; INDEX33563314(GW_GWZ as b)
7 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var2
8 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
*/
2.2 去掉order by
去掉order by,连接类型变得正常(即:内连接),说明此bug的触发是平级主体查询distinct + order by + 表连接。
SELECT DISTINCT a.lnggwnh,b.lnggwzid,b.chrgwzmc
FROM gw_wh a
LEFT JOIN gw_gwz b ON a.lnggwzid = b.lnggwzid
WHERE a.lnggwnh = 2022
AND b.lngdwid = 1374972990848155649
/* order by b.lnggwzid desc */;
-- 执行计划
/*
1 #NSET2: [144, 68261, 168]
2 #PRJT2: [144, 68261, 168]; exp_num(3), is_atom(FALSE)
3 #DISTINCT: [144, 68261, 168]
4 #HASH2 INNER JOIN: [138, 68261, 168]; LKEY_UNIQUE KEY_NUM(1); KEY(b.LNGGWZID=a.LNGGWZID) KEY_NULL_EQU(0)
5 #SLCT2: [1, 40, 108]; b.LNGDWID = var1
6 #CSCN2: [1, 387, 108]; INDEX33563314(GW_GWZ as b)
7 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var2
8 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
*/
2.3 改为索引连接
左连接正常转成内连接,但在此场景走嵌套循环索引连接的效率很低。
SELECT /*+ enable_hash_join(0)*/ a.lnggwnh,b.lnggwzid,b.chrgwzmc
FROM gw_wh a
LEFT JOIN gw_gwz b ON a.lnggwzid = b.lnggwzid
WHERE a.lnggwnh = 2022
AND b.lngdwid = 1374972990848155649
group by a.lnggwnh,b.lnggwzid,b.chrgwzmc
order by b.lnggwzid desc;
/*
1 #NSET2: [73762, 1, 168]
2 #PRJT2: [73762, 1, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [73762, 1, 168]; key_num(2), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #SLCT2: [73637, 612647, 168]; b.LNGDWID = var1
5 #NEST LOOP INDEX JOIN2: [73637, 612647, 168]
6 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var2
7 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
8 #BLKUP2: [4001, 1, 0]; INDEX33563315(b)
9 #SSEK2: [4001, 1, 0]; scan_type(ASC), INDEX33563315(GW_GWZ as b), scan_range[a.LNGGWZID,a.LNGGWZID]
*/
3. 解决方法
3.1 distinct换用group by去重
结论:结果集是正确的。是因为将排序与去重分开操纵,即先分组去重,再排序。
SELECT a.lnggwnh,b.lnggwzid,b.chrgwzmc
FROM gw_wh a
LEFT JOIN gw_gwz b ON a.lnggwzid = b.lnggwzid
WHERE a.lnggwnh = 2022
AND b.lngdwid = 1374972990848155649
group by a.lnggwnh,b.lnggwzid,b.chrgwzmc
order by b.lnggwzid desc;
-- 执行计划
/*
1 #NSET2: [145, 40, 168]
2 #PRJT2: [145, 40, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [145, 40, 168]; key_num(1), is_distinct(FALSE), top_flag(0), is_adaptive(0)
4 #HAGR2: [144, 40, 168]; grp_num(2), sfun_num(0); slave_empty(0) keys(b.LNGGWZID, b.CHRGWZMC)
5 #HASH LEFT SEMI JOIN2: [138, 68261, 168]; KEY_NUM(1); KEY(b.LNGGWZID=a.LNGGWZID) KEY_NULL_EQU(0)
6 #SLCT2: [1, 40, 108]; b.LNGDWID = var2
7 #CSCN2: [1, 387, 108]; INDEX33563314(GW_GWZ as b)
8 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var3
9 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
*/
3.2 表子查询延迟排序
先让其正常变为内连接后去重,最后再排序。
select * from (
SELECT DISTINCT a.lnggwnh,b.lnggwzid,b.chrgwzmc
FROM gw_wh a
LEFT JOIN gw_gwz b ON a.lnggwzid = b.lnggwzid
WHERE a.lnggwnh = 2022
AND b.lngdwid = 1374972990848155649
) t
order by t.lnggwzid desc;
/*
1 #NSET2: [145, 80, 168]
2 #PRJT2: [145, 80, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [145, 80, 168]; key_num(1), is_distinct(FALSE), top_flag(0), is_adaptive(0)
4 #PRJT2: [144, 80, 168]; exp_num(3), is_atom(FALSE)
5 #DISTINCT: [144, 80, 168]
6 #HASH2 INNER JOIN: [138, 68261, 168]; LKEY_UNIQUE KEY_NUM(1); KEY(b.LNGGWZID=a.LNGGWZID) KEY_NULL_EQU(0)
7 #SLCT2: [1, 40, 108]; b.LNGDWID = var1
8 #CSCN2: [1, 387, 108]; INDEX33563314(GW_GWZ as b)
9 #SLCT2: [94, 612647, 60]; a.LNGGWNH = var2
10 #CSCN2: [94, 741641, 60]; INDEX33563302(GW_WH as a)
*/
3.3 降低优化器版本
数据库全局生效:SP_SET_PARA_VALUE(1, ‘OPTIMIZER_VERSION’, 70081); – OLD
VALUE:70097 通常以最新版本
虽说属于会话级的参数,但需要重启数据库生效,影响所有会话连接,否则只影响当前会话的查询环境(hint或者alter session)。
SELECT /*+ OPTIMIZER_VERSION(70081)*/
DISTINCT
A.LNGGWNH,
B.LNGGWZID,
B.CHRGWZMC
FROM
GW_WH A
LEFT JOIN GW_GWZ B
ON
A.LNGGWZID = B.LNGGWZID
WHERE
A.LNGGWNH = 2022
AND B.LNGDWID = 1374972990848155649
ORDER BY
B.LNGGWZID DESC;
/*
1 #NSET2: [157, 40, 168]
2 #PRJT2: [157, 40, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [157, 40, 168]; key_num(3), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #HASH2 INNER JOIN: [138, 222244, 168]; LKEY_UNIQUE KEY_NUM(1); KEY(B.LNGGWZID=A.LNGGWZID) KEY_NULL_EQU(0)
5 #SLCT2: [1, 40, 108]; B.LNGDWID = var1
6 #CSCN2: [1, 381, 108]; INDEX33563314(GW_GWZ as B)
7 #SLCT2: [93, 611173, 60]; A.LNGGWNH = var2
8 #CSCN2: [93, 740368, 60]; INDEX33563302(GW_WH as A)
*/
3.4 修改优化器模式
SELECT /*+ OPTIMIZER_MODE(0)*/
DISTINCT
A.LNGGWNH,
B.LNGGWZID,
B.CHRGWZMC
FROM
GW_WH A
LEFT JOIN GW_GWZ B
ON
A.LNGGWZID = B.LNGGWZID
WHERE
A.LNGGWNH = 2022
AND B.LNGDWID = 1374972990848155649
ORDER BY
B.LNGGWZID DESC;
/*
1 #NSET2: [157, 40, 168]
2 #PRJT2: [157, 40, 168]; exp_num(3), is_atom(FALSE)
3 #SORT3: [157, 40, 168]; key_num(3), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #HASH2 INNER JOIN: [138, 222244, 168]; LKEY_UNIQUE KEY_NUM(1); KEY(B.LNGGWZID=A.LNGGWZID) KEY_NULL_EQU(0)
5 #SLCT2: [1, 40, 108]; B.LNGDWID = var1
6 #CSCN2: [1, 381, 108]; INDEX33563314(GW_GWZ as B)
7 #SLCT2: [93, 611173, 60]; A.LNGGWNH = var2
8 #CSCN2: [93, 740368, 60]; INDEX33563302(GW_WH as A)
*/
3.5 升级稳定版本
在数据库版本1-1-144以后版本修复此bug,本人已验证,执行计划如3.3/3.4相同。 切记: 升级数据库版一定要慎重,尤其是线上生产库,已经运行很长时间。 如果随意升级,当时只是解决已知问题,而不能预见新问题,轻易升级可能导致未知大量错误,所以做任何修改操作前,一定要有回退方案和评估范围。
4. 案例再现
4.1 错误再现
测试版本:1-1-126-20.09.04-126608-ENT
模拟SQL
SELECT DISTINCT
a.ID id1,
B.ID id2,
B.NAME
FROM
sysobjects a
LEFT JOIN sysobjects b on 1=1
WHERE
a.id = 1
and b.TYPE$ = 'SCH'
ORDER BY
B.ID DESC;
-- 执行计划
/*
1 #NSET2: [1, 1, 104]
2 #PRJT2: [1, 1, 104]; exp_num(3), is_atom(FALSE)
3 #SORT3: [1, 1, 104]; key_num(2), is_distinct(TRUE), top_flag(0), is_adaptive(0) -- SORT + DISTINCT
4 #NEST LOOP SEMI JOIN2: [0, 34, 104];
5 #CSEK2: [0, 34, 100]; scan_type(ASC), SYSINDEXSYSOBJECTS(SYSOBJECTS as B), scan_range[('SCH',min,min),('SCH',max,max))
6 #SSEK2: [0, 1, 4]; scan_type(ASC), SYSINDEXIDSYSOBJECTS(SYSOBJECTS as A), scan_range[1,1]
*/
4.2 解决手段
-- 1. DISTINCT -> 2. SORT
select * from (
SELECT DISTINCT
a.ID id1,
B.ID id2,
B.NAME
FROM
sysobjects a
LEFT JOIN sysobjects b on 1=1
WHERE
a.id = 1
and b.TYPE$ = 'SCH'
) t
ORDER BY
t.ID2 DESC;
/*
1 #NSET2: [2, 1, 104]
2 #PRJT2: [2, 1, 104]; exp_num(3), is_atom(FALSE)
3 #SORT3: [2, 1, 104]; key_num(1), is_distinct(FALSE), top_flag(0), is_adaptive(0)
4 #PRJT2: [1, 1, 104]; exp_num(3), is_atom(FALSE)
5 #DISTINCT: [1, 1, 104]
6 #NEST LOOP INNER JOIN2: [0, 34, 104];
7 #SSEK2: [0, 1, 4]; scan_type(ASC), SYSINDEXIDSYSOBJECTS(SYSOBJECTS as A), scan_range[1,1]
8 #CSEK2: [0, 34, 100]; scan_type(ASC), SYSINDEXSYSOBJECTS(SYSOBJECTS as B), scan_range[('SCH',min,min),('SCH',max,max))
*/
-- INNER JOIN -> DISTINCT + SORT
SELECT /*+ optimizer_mode(0)*/ DISTINCT
a.ID ID1,
b.ID ID2,
b.NAME
FROM
sysobjects a
LEFT JOIN sysobjects b on 1=1
WHERE
a.id = 1
and b.TYPE$ = 'SCH'
ORDER BY
b.id DESC;
/*
1 #NSET2: [1, 1, 104]
2 #PRJT2: [1, 1, 104]; exp_num(3), is_atom(FALSE)
3 #SORT3: [1, 1, 104]; key_num(2), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #NEST LOOP INNER JOIN2: [0, 34, 104];
5 #SSEK2: [0, 1, 4]; scan_type(ASC), SYSINDEXIDSYSOBJECTS(SYSOBJECTS as A), scan_range[1,1]
6 #CSEK2: [0, 34, 100]; scan_type(ASC), SYSINDEXSYSOBJECTS(SYSOBJECTS as B), scan_range[('SCH',min,min),('SCH',max,max))
*/
SELECT /*+ optimizer_version(70081)*/ DISTINCT
a.ID ID1,
b.ID ID2,
b.NAME
FROM
sysobjects a
LEFT JOIN sysobjects b on 1=1
WHERE
a.id = 1
and b.TYPE$ = 'SCH'
ORDER BY
b.id DESC;
/*
1 #NSET2: [1, 1, 104]
2 #PRJT2: [1, 1, 104]; exp_num(3), is_atom(FALSE)
3 #SORT3: [1, 1, 104]; key_num(3), is_distinct(TRUE), top_flag(0), is_adaptive(0)
4 #NEST LOOP INNER JOIN2: [0, 34, 104];
5 #SSEK2: [0, 1, 4]; scan_type(ASC), SYSINDEXIDSYSOBJECTS(SYSOBJECTS as A), scan_range[1,1]
6 #CSEK2: [0, 34, 100]; scan_type(ASC), SYSINDEXSYSOBJECTS(SYSOBJECTS as B), scan_range[('SCH',min,min),('SCH',max,max))
*/