[20181123]关于降序索引问题.txt
--//以前写的链接:http://blog.itpub.net/267265/viewspace-1159181/
--//降序索引实际上dump值的每个值与0xff异或,最后添加0xff.这样索引键值长度比普通索引长度+1.
--//NULL实际上对应0xff,这样异或后变成0x00.
--//有几个疑问.1.如果字符串长度已经是4000字符如何处理.2.如果插入chr(0),对应键值是0xff吗?
--//还是通过测试说明问题:
1.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
2.建立测试:
create table t (id number,name varchar2(4000));
insert into t values (1,'aaaaaa');
insert into t values (2,'bbbbbb');
insert into t values (3,chr(0));
insert into t values (4,lpad('c',4000,'c'));
commit ;
create index i_t_name on t(name desc);
SCOTT@book> select segment_name,header_file,header_block from dba_segments where owner=user and segment_name in ('I_T_NAME');
SEGMENT_NAME HEADER_FILE HEADER_BLOCK
-------------------- ----------- ------------
I_T_NAME 4 554
--//记录很少,仅仅只有一个索引根节点。
SCOTT@book> alter system dump datafile 4 block 555;
System altered.
3.检查转储:
Block header dump: 0x0100022b
Object id on Block? Y
seg/obj: 0x1610c csc: 0x03.376e0cf9 itc: 2 flg: E typ: 2 - INDEX
brn: 0 bdba: 0x1000228 ver: 0x01 opc: 0
inc: 0 exflg: 0
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
0x02 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.376e0cf9
Leaf block dump
===============
header address 140469718254180=0x7fc1a7a88264
kdxcolev 0
KDXCOLEV Flags = - - -
kdxcolok 0
kdxcoopc 0x80: opcode=0: iot flags=--- is converted=Y
kdxconco 2
kdxcosdc 0
kdxconro 4
kdxcofbo 44=0x2c
kdxcofeo 3974=0xf86
kdxcoavs 3930
kdxlespl 0
kdxlende 0
kdxlenxt 0=0x0
kdxleprv 0=0x0
kdxledsz 0
kdxlebksz 8032
row#0[4021] flag: ------, lock: 0, len=4011
col 0; len 4000; (4000):
9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c
9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c
...
9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c 9c
--//可以发现如果字符串长度等于4000,后面的0xff不存在.
col 1; len 6; (6): 01 00 02 24 00 03
row#1[4004] flag: ------, lock: 0, len=17
col 0; len 7; (7): 9d 9d 9d 9d 9d 9d ff
col 1; len 6; (6): 01 00 02 24 00 01
row#2[3987] flag: ------, lock: 0, len=17
col 0; len 7; (7): 9e 9e 9e 9e 9e 9e ff
col 1; len 6; (6): 01 00 02 24 00 00
row#3[3974] flag: ------, lock: 0, len=13
col 0; len 3; (3): fe fe ff
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
col 1; len 6; (6): 01 00 02 24 00 02
----- end of leaf block dump -----
End dump data blocks tsn: 4 file#: 4 minblk 555 maxblk 555
SCOTT@book> select dump(name,16) c30 from t where id <=3;
C30
------------------------------
Typ=1 Len=6: 61,61,61,61,61,61
Typ=1 Len=6: 62,62,62,62,62,62
Typ=1 Len=1: 0
--//奇怪的是chr(0),降序排序后变成了fe fe ff.长度变成了3.看来我以前想的过于简单了.
SCOTT@book> create index i_t_namex on t(name);
Index created.
SCOTT@book> select segment_name,header_file,header_block from dba_segments where owner=user and segment_name in ('I_T_NAMEX');
SEGMENT_NAME HEADER_FILE HEADER_BLOCK
-------------------- ----------- ------------
I_T_NAMEX 4 562
SCOTT@book> alter system dump datafile 4 block 563;
System altered.
Block header dump: 0x01000233
Object id on Block? Y
seg/obj: 0x1610e csc: 0x03.376e112c itc: 2 flg: E typ: 2 - INDEX
brn: 0 bdba: 0x1000230 ver: 0x01 opc: 0
inc: 0 exflg: 0
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
0x02 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.376e112c
Leaf block dump
===============
header address 140469718254180=0x7fc1a7a88264
kdxcolev 0
KDXCOLEV Flags = - - -
kdxcolok 0
kdxcoopc 0x80: opcode=0: iot flags=--- is converted=Y
kdxconco 2
kdxcosdc 0
kdxconro 4
kdxcofbo 44=0x2c
kdxcofeo 3978=0xf8a
kdxcoavs 3934
kdxlespl 0
kdxlende 0
kdxlenxt 0=0x0
kdxleprv 0=0x0
kdxledsz 0
kdxlebksz 8032
row#0[8021] flag: ------, lock: 0, len=11
col 0; len 1; (1): 00
col 1; len 6; (6): 01 00 02 24 00 02
row#1[8005] flag: ------, lock: 0, len=16
col 0; len 6; (6): 61 61 61 61 61 61
col 1; len 6; (6): 01 00 02 24 00 00
row#2[7989] flag: ------, lock: 0, len=16
col 0; len 6; (6): 62 62 62 62 62 62
col 1; len 6; (6): 01 00 02 24 00 01
row#3[3978] flag: ------, lock: 0, len=4011
col 0; len 4000; (4000):
63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63
...
63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63 63
col 1; len 6; (6): 01 00 02 24 00 03
----- end of leaf block dump -----
End dump data blocks tsn: 4 file#: 4 minblk 563 maxblk 563
3.重新测试:
create table tx (id number,name varchar2(10));
insert into tx values (0,chr(0));
insert into tx values (1,chr(1));
insert into tx values (2,chr(2));
insert into tx values (3,chr(97));
commit ;
SCOTT@book> create index if_tx_name on tx(name desc,name);
Index created.
SCOTT@book> select segment_name,header_file,header_block from dba_segments where owner=user and segment_name in ('IF_TX_NAME');
SEGMENT_NAME HEADER_FILE HEADER_BLOCK
-------------------- ----------- ------------
IF_TX_NAME 4 682
SCOTT@book> alter system flush buffer_cache;
System altered.
SCOTT@book> alter system dump datafile 4 block 683;
System altered.
--//检查转储:
row#0[8018] flag: ------, lock: 0, len=14
col 0; len 2; (2): 9e ff
col 1; len 1; (1): 61
col 2; len 6; (6): 01 00 02 a4 00 03
row#1[8004] flag: ------, lock: 0, len=14
col 0; len 2; (2): fd ff
col 1; len 1; (1): 02
col 2; len 6; (6): 01 00 02 a4 00 02
row#2[7989] flag: ------, lock: 0, len=15
col 0; len 3; (3): fe fa ff
col 1; len 1; (1): 01
col 2; len 6; (6): 01 00 02 a4 00 01
row#3[7974] flag: ------, lock: 0, len=15
col 0; len 3; (3): fe fe ff
col 1; len 1; (1): 00
col 2; len 6; (6): 01 00 02 a4 00 00
--//要表达chr(0)为什么是fe fe,真是再次考验自己的中文表达能力^_^.
1.首先1点为什么最后加上0xff,实际上为了排序的需要,假设2个字符串排序'a','aa',如果正常排序'a','aa'.
降序排序就是'aa'在前,'a'在后.如果没有0xff在最后,'aa'=>'0x9e9e','a'=>'0x9e',这样排序变成'a'在前,'aa'在后.
最后补上'0xff'后,'保证'a'在'aa'之后(指编码之后).aa'=>'0x9e9eff','a'=>'0x9eff',这样降序排序保证'aa'在前,'a'在后.因为
0xff是最大的ascii码. 这也是为什么降序排序最后要加上0xff.
2.这样问题就来了,假设排序字符串'a\0'(实际上就是'a'||chr(0)).这样如果按照前面的编码问题就来了.
'a\0'对应编码就是'0x9effff','a'=>'0x9eff'.这样降序排序有变成了'a'在前,'a\0'在后,违背降序排序的原则.
为了规避这个风险,oracle选择'0xfefe'表示chr(0)的降序.这样chr(1)就不能再是'0xfe',变成了'0xfefa',
至于为什么不是'0xfefd','0xfefc',我就不知道了.
--//另外实际上降序使用的函数是sys_op_descend,反向是SYS_OP_UNDESCEND,可以通过执行计划看出来.
SCOTT@book> select /*+ index(t) */ count(*) from tx where name=chr(0);
COUNT(*)
----------
1
SCOTT@book> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 4t3j0dhf5qg2c, child number 0
-------------------------------------
select /*+ index(t) */ count(*) from tx where name=chr(0)
Plan hash value: 4016183490
---------------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 1 (100)| |
| 1 | SORT AGGREGATE | | 1 | 7 | | |
|* 2 | INDEX RANGE SCAN| IF_TX_NAME | 1 | 7 | 1 (0)| 00:00:01 |
---------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / TX@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("TX"."SYS_NC00003$"=HEXTORAW('FEFEFF') )
filter(SYS_OP_UNDESCEND("TX"."SYS_NC00003$")=')
--//执行计划实际上包括access以及filter.
SCOTT@book> select sys_op_descend(chr(rownum-1)) from dual connect by level<=8;
SYS_OP_D
--------
FEFEFF
FEFAFF
FDFF
FCFF
FBFF
FAFF
F9FF
F8FF
8 rows selected.
SCOTT@book> select SYS_OP_UNDESCEND('FEFEFF') from dual ;
SY
--
00
SCOTT@book> select SYS_OP_UNDESCEND('FEFDFF') from dual ;
SYS_
----
0000
SCOTT@book> select SYS_OP_UNDESCEND('FEFCFF') from dual ;
SYS_
----
0001
SCOTT@book> select SYS_OP_UNDESCEND('FEFAFF') from dual ;
SY
--
01
--//这样大家明白为什么chr(1)是'0xFEFA'吧,大家自己细细体会吧,不再写出来了,真心不好写.
总结:
--//真心佩服oracle的一些设计细节,真是博大精深.
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/267265/viewspace-2221425/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/267265/viewspace-2221425/