原文地址: http://www.cnblogs.com/killkill/archive/2010/09/06/1819536.html
Oracle使用正则表达式离不开这4个函数:
1。regexp_like
2。regexp_substr
3。regexp_instr
4。regexp_replace
看函数名称大概就能猜到有什么用了。
regexp_like 只能用于条件表达式,和 like 类似,但是使用的正则表达式进行匹配,语法很简单:
regexp_substr 函数,和 substr 类似,用于拾取合符正则表达式描述的字符子串,语法如下:
regexp_instr 函数,和 instr 类似,用于标定符合正则表达式的字符子串的开始位置,语法如下:
regexp_replace 函数,和 replace 类似,用于替换符合正则表达式的字符串,语法如下:
这里解析一下几个参数的含义:
1。source_char,输入的字符串,可以是列名或者字符串常量、变量。
2。pattern,正则表达式。
3。match_parameter,匹配选项。
取值范围: i:大小写不敏感; c:大小写敏感;n:点号 . 不匹配换行符号;m:多行模式;x:扩展模式,忽略正则表达式中的空白字符。
4。position,标识从第几个字符开始正则表达式匹配。
5。occurrence,标识第几个匹配组。
6。replace_string,替换的字符串。
说了一堆文绉绉的,现在开始实例演练了,在此之前先建好一个表。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
create
table
tmp
as
with
data
as
(
select
'like'
as
id ,
'a9999'
as
str
from
dual
union
all
select
'like'
,
'a9c'
from
dual
union
all
select
'like'
,
'A7007'
from
dual
union
all
select
'like'
,
'123a34cc'
from
dual
union
all
select
'substr'
,
'123,234,345'
from
dual
union
all
select
'substr'
,
'12,34.56:78'
from
dual
union
all
select
'substr'
,
'123456789'
from
dual
union
all
select
'instr'
,
'192.168.0.1'
from
dual
union
all
select
'replace'
,
'(020)12345678'
from
dual
union
all
select
'replace'
,
'001517729C28'
from
dual
)
select
*
from
data ;
select
*
from
tmp ;
ID STR
------- -------------
like
a9999
like
a9c
like
A7007
like
123a34cc
substr 123,234,345
substr 12,34.56:78
substr 123456789
instr 192.168.0.1
replace
(020)12345678
replace
001517729C28
|
regexp_like 例子:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
select
str
from
tmp
where
id=
'like'
and
regexp_like(str,
'A\d+'
,
'i'
);
-- 'i' 忽略大小写
STR
-------------
a9999
a9c
A7007
123a34cc
select
str
from
tmp
where
id=
'like'
and
regexp_like(str,
'a\d+'
);
STR
-------------
a9999
a9c
123a34cc
select
str
from
tmp
where
id=
'like'
and
regexp_like(str,
'^a\d+'
);
STR
-------------
a9999
a9c
select
str
from
tmp
where
id=
'like'
and
regexp_like(str,
'^a\d+$'
);
STR
-------------
a9999
|
regexp_substr 例子:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
col str format a15;
select
str,
regexp_substr(str,
'[^,]+'
) str,
regexp_substr(str,
'[^,]+'
,1,1) str,
regexp_substr(str,
'[^,]+'
,1,2) str,
-- occurrence 第几个匹配组
regexp_substr(str,
'[^,]+'
,2,1) str
-- position 从第几个字符开始匹配
from
tmp
where
id=
'substr'
;
STR STR STR STR STR
--------------- --------------- --------------- --------------- ---------------
123,234,345 123 123 234 23
12,34.56:78 12 12 34.56:78 2
123456789 123456789 123456789 23456789
select
str,
regexp_substr(str,
'\d'
) str,
regexp_substr(str,
'\d+'
,1,1) str,
regexp_substr(str,
'\d{2}'
,1,2) str,
regexp_substr(str,
'\d{3}'
,2,1) str
from
tmp
where
id=
'substr'
;
STR STR STR STR STR
--------------- --------------- --------------- --------------- ---------------
123,234,345 1 123 23 234
12,34.56:78 1 12 34
123456789 1 123456789 34 234
select
regexp_substr(
'123456789'
,
'\d'
,1,
level
) str
--取出每位数字,有时这也是行转列的方式
from
dual
connect
by
level
<=9
STR
---------------
1
2
3
4
5
6
7
8
9
|
regex_instr 例子:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
col ind format 9999;
select
str,
regexp_instr(str,
'\.'
) ind ,
regexp_instr(str,
'\.'
,1,2) ind ,
regexp_instr(str,
'\.'
,5,2) ind
from
tmp
where
id=
'instr'
;
STR IND IND IND
--------------- ----- ----- -----
192.168.0.1 4 8 10
select
regexp_instr(
'192.168.0.1'
,
'\.'
,1,
level
) ind ,
-- 点号. 所在的位置
regexp_instr(
'192.168.0.1'
,
'\d'
,1,
level
) ind
-- 每个数字的位置
from
dual
connect
by
level
<= 9
IND IND
----- -----
4 1
8 2
10 3
0 5
0 6
0 7
0 9
0 11
0 0
|
regex_replace 例子:
1
2
3
4
5
6
7
8
9
10
|
select
str,
regexp_replace(str,
'020'
,
'GZ'
) str,
regexp_replace(str,
'(\d{3})(\d{3})'
,
'<\2\1>'
) str
-- 将第一、第二捕获组交换位置,用尖括号标识出来
from
tmp
where
id=
'replace'
;
STR STR STR
--------------- --------------- ---------------
(020)12345678 (GZ)12345678 (020)<456123>78
001517729C28 001517729C28 <517001>729C28
|
综合应用的例子:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
col row_line format a30;
with
sudoku
as
(
select
'020000080568179234090000010030040050040205090070080040050000060289634175010000020'
as
line
from
dual
),
tmp
as
(
select
regexp_substr(line,
'\d{9}'
,1,
level
) row_line,
level
col
from
sudoku
connect
by
level
<=9
)
select
regexp_replace( row_line ,
'(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)'
,
'\1 \2 \3 \4 \5 \6 \7 \8 \9'
) row_line
from
tmp
ROW_LINE
------------------------------
0 2 0 0 0 0 0 8 0
5 6 8 1 7 9 2 3 4
0 9 0 0 0 0 0 1 0
0 3 0 0 4 0 0 5 0
0 4 0 2 0 5 0 9 0
0 7 0 0 8 0 0 4 0
0 5 0 0 0 0 0 6 0
2 8 9 6 3 4 1 7 5
0 1 0 0 0 0 0 2 0
|