Oracle存储过程例子:运用了正则表达式、数组等
CREATE
OR
REPLACE
PROCEDURE
"SP_GET_ARTICLE_DATA"
(article_detail_id_$ number ,
Record_Md5_ID_$ varchar2 ,
content_text_$ clob,
ReturnValue out number
) is
begin
declare
-- 自定义数组
TYPE StringArray IS table OF VARCHAR2 ( 1000 );
Offset_URL number ;
Offset_Email number ;
Offset_Phone number ;
Offset_Account number ;
-- 数据下标
URL_V_Num number : = 1 ;
Email_V_Num number : = 1 ;
Phone_V_Num number : = 1 ;
Account_V_Num number : = 1 ;
-- 数据元素存在标志
Is_Exsit_Flag number ;
URL varchar2 ( 3000 );
Email varchar2 ( 200 );
Phone varchar2 ( 100 );
Bank_Account varchar2 ( 100 );
URL_Array StringArray: = StringArray();
Email_Array StringArray: = StringArray();
Phone_Array StringArray: = StringArray();
Account_Array StringArray: = StringArray();
-- 常量
URL_RegExp constant varchar2 ( 300 ): = ' (((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\& ' || ' %_\./-~-]*)? ' ;
Email_RegExp constant varchar2 ( 200 ): = ' [A-Za-z0-9_]+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)* ' ;
Phone_RegExp constant varchar2 ( 300 ): = ' [+[:digit:]]{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{4}|[+[:digit:]]?[-\.,[:blank:]]*\d{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]+\d{4}|\d{3}[-\.,[:blank:]]+\d{4} ' ;
Bank_Account_RegExp constant varchar2 ( 200 ): = ' (SA)?[[:digit:]]{22}|(SA)?[[:blank:]]*([[:digit:]]{4}|\d{2})[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*(\d{4}|\d{2}) ' ;
URL_Escape constant varchar2 ( 100 ): = ' \.jpg|\.rar|\.zip|\.png|\.gif|\.pdf|\.doc|\.excel|\.swf|\.mpg|\.\.|\.rmvb ' ;
DUP_VAL_INDEX exception;
pragma exception_init(DUP_VAL_INDEX, - 0001 );
begin
if article_detail_id_$ is not null and article_detail_id_$ <> 0 then
Offset_URL: = 1 ;
Offset_Email: = 1 ;
Offset_Phone: = 1 ;
Offset_Account: = 1 ;
while Offset_URL <> 0 or Offset_Email <> 0 or Offset_Phone <> 0 or Offset_Account <> 0
loop
-- 获取URL
if Offset_URL <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,URL_RegExp, Offset_URL, 1 , ' i ' )))
into URL from dual;
-- 获取从offset位置开始的第一个符合URL表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, URL_RegExp, Offset_URL, 1 , 1 , ' i ' )
into Offset_URL from dual;
end if ;
-- 获取Email
if Offset_Email <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Email_RegExp, Offset_Email, 1 , ' i ' )))
into Email from dual;
-- 获取从offset位置开始的第一个符合Email表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Email_RegExp, Offset_Email, 1 , 1 , ' i ' )
into Offset_Email from dual;
end if ;
-- 获取Phone
if Offset_Phone <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Phone_RegExp, Offset_Phone, 1 , ' i ' )))
into Phone from dual;
-- 获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Phone_RegExp, Offset_Phone, 1 , 1 , ' i ' )
into Offset_Phone from dual;
end if ;
-- 获取Bank_Account
if Offset_Account <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Bank_Account_RegExp, Offset_Account, 1 , ' i ' )))
into Bank_Account from dual;
-- 获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Bank_Account_RegExp, Offset_Account, 1 , 1 , ' i ' )
into Offset_Account from dual;
end if ;
-- URL插入到数组中
if (length(URL) > 0 and length(URL) < 800 ) and REGEXP_INSTR(URL, URL_Escape, 1 , 1 , 0 , ' i ' ) = 0 then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..URL_Array. COUNT LOOP
if URL = URL_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
URL_Array.EXTEND( 1 );
URL_Array(URL_V_Num): = URL;
URL_V_Num: = URL_V_Num + 1 ;
end if ;
end if ;
-- Email插入到数组中
if (length(Email) > 0 and length(Email) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Email_Array. COUNT LOOP
if Email = Email_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Email_Array.EXTEND( 1 );
Email_Array(Email_V_Num): = Email;
Email_V_Num: = Email_V_Num + 1 ;
end if ;
end if ;
-- Phone插入到数组中
if (length(Phone) > 6 and length(Phone) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Phone_Array. COUNT LOOP
if Phone = Phone_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Phone_Array.EXTEND( 1 );
Phone_Array(Phone_V_Num): = Phone;
Phone_V_Num: = Phone_V_Num + 1 ;
end if ;
end if ;
-- Bank_Account插入到数组中
if (length(Bank_Account) > 0 and length(Bank_Account) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Account_Array. COUNT LOOP
if Bank_Account = Account_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Account_Array.EXTEND( 1 );
Account_Array(Account_V_Num): = Bank_Account;
Account_V_Num: = Account_V_Num + 1 ;
end if ;
end if ;
end loop;
-- Dbms_Output.put_line('Array is:'||URL_Array(1));
-- 插入获取到的URL
FOR I IN 1 ..URL_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' URL ' , ' 1 ' ,URL_Array(I));
END LOOP;
-- 插入获取到的Email
FOR I IN 1 ..Email_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Email ' , ' 0 ' ,Email_Array(I));
END LOOP;
-- 插入获取到的Phone
FOR I IN 1 ..Phone_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Phone ' , ' 0 ' ,Phone_Array(I));
END LOOP;
-- 插入获取到的Bank_Account
FOR I IN 1 ..Account_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Bank Account ' , ' 0 ' ,Account_Array(I));
END LOOP;
end if ;
Exception
when NO_DATA_FOUND then
ReturnValue: =- 1000 ;
when DUP_VAL_INDEX then
ReturnValue: =- 1001 ;
when VALUE_ERROR then
ReturnValue: =- 1002 ;
when TOO_MANY_ROWS then
ReturnValue: =- 1003 ;
when OTHERS then
ReturnValue: =- 1004 ;
-- Dbms_Output.put_line('The SQLCode is: '||SQLCODE);
-- Dbms_Output.put_line('The SQLERRM is: '||SQLERRM);
end ;
end SP_GET_ARTICLE_DATA;
(article_detail_id_$ number ,
Record_Md5_ID_$ varchar2 ,
content_text_$ clob,
ReturnValue out number
) is
begin
declare
-- 自定义数组
TYPE StringArray IS table OF VARCHAR2 ( 1000 );
Offset_URL number ;
Offset_Email number ;
Offset_Phone number ;
Offset_Account number ;
-- 数据下标
URL_V_Num number : = 1 ;
Email_V_Num number : = 1 ;
Phone_V_Num number : = 1 ;
Account_V_Num number : = 1 ;
-- 数据元素存在标志
Is_Exsit_Flag number ;
URL varchar2 ( 3000 );
Email varchar2 ( 200 );
Phone varchar2 ( 100 );
Bank_Account varchar2 ( 100 );
URL_Array StringArray: = StringArray();
Email_Array StringArray: = StringArray();
Phone_Array StringArray: = StringArray();
Account_Array StringArray: = StringArray();
-- 常量
URL_RegExp constant varchar2 ( 300 ): = ' (((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\& ' || ' %_\./-~-]*)? ' ;
Email_RegExp constant varchar2 ( 200 ): = ' [A-Za-z0-9_]+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)* ' ;
Phone_RegExp constant varchar2 ( 300 ): = ' [+[:digit:]]{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{4}|[+[:digit:]]?[-\.,[:blank:]]*\d{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]+\d{4}|\d{3}[-\.,[:blank:]]+\d{4} ' ;
Bank_Account_RegExp constant varchar2 ( 200 ): = ' (SA)?[[:digit:]]{22}|(SA)?[[:blank:]]*([[:digit:]]{4}|\d{2})[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*(\d{4}|\d{2}) ' ;
URL_Escape constant varchar2 ( 100 ): = ' \.jpg|\.rar|\.zip|\.png|\.gif|\.pdf|\.doc|\.excel|\.swf|\.mpg|\.\.|\.rmvb ' ;
DUP_VAL_INDEX exception;
pragma exception_init(DUP_VAL_INDEX, - 0001 );
begin
if article_detail_id_$ is not null and article_detail_id_$ <> 0 then
Offset_URL: = 1 ;
Offset_Email: = 1 ;
Offset_Phone: = 1 ;
Offset_Account: = 1 ;
while Offset_URL <> 0 or Offset_Email <> 0 or Offset_Phone <> 0 or Offset_Account <> 0
loop
-- 获取URL
if Offset_URL <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,URL_RegExp, Offset_URL, 1 , ' i ' )))
into URL from dual;
-- 获取从offset位置开始的第一个符合URL表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, URL_RegExp, Offset_URL, 1 , 1 , ' i ' )
into Offset_URL from dual;
end if ;
-- 获取Email
if Offset_Email <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Email_RegExp, Offset_Email, 1 , ' i ' )))
into Email from dual;
-- 获取从offset位置开始的第一个符合Email表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Email_RegExp, Offset_Email, 1 , 1 , ' i ' )
into Offset_Email from dual;
end if ;
-- 获取Phone
if Offset_Phone <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Phone_RegExp, Offset_Phone, 1 , ' i ' )))
into Phone from dual;
-- 获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Phone_RegExp, Offset_Phone, 1 , 1 , ' i ' )
into Offset_Phone from dual;
end if ;
-- 获取Bank_Account
if Offset_Account <> 0 then
select trim(to_char(REGEXP_SUBSTR(content_text_$,Bank_Account_RegExp, Offset_Account, 1 , ' i ' )))
into Bank_Account from dual;
-- 获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select REGEXP_INSTR(content_text_$, Bank_Account_RegExp, Offset_Account, 1 , 1 , ' i ' )
into Offset_Account from dual;
end if ;
-- URL插入到数组中
if (length(URL) > 0 and length(URL) < 800 ) and REGEXP_INSTR(URL, URL_Escape, 1 , 1 , 0 , ' i ' ) = 0 then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..URL_Array. COUNT LOOP
if URL = URL_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
URL_Array.EXTEND( 1 );
URL_Array(URL_V_Num): = URL;
URL_V_Num: = URL_V_Num + 1 ;
end if ;
end if ;
-- Email插入到数组中
if (length(Email) > 0 and length(Email) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Email_Array. COUNT LOOP
if Email = Email_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Email_Array.EXTEND( 1 );
Email_Array(Email_V_Num): = Email;
Email_V_Num: = Email_V_Num + 1 ;
end if ;
end if ;
-- Phone插入到数组中
if (length(Phone) > 6 and length(Phone) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Phone_Array. COUNT LOOP
if Phone = Phone_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Phone_Array.EXTEND( 1 );
Phone_Array(Phone_V_Num): = Phone;
Phone_V_Num: = Phone_V_Num + 1 ;
end if ;
end if ;
-- Bank_Account插入到数组中
if (length(Bank_Account) > 0 and length(Bank_Account) < 100 ) then
Is_Exsit_Flag : = 0 ;
FOR I IN 1 ..Account_Array. COUNT LOOP
if Bank_Account = Account_Array(I) then
Is_Exsit_Flag: = 1 ;
end if ;
END LOOP;
-- 如果数组中不存在当前值,则插入到数组
if Is_Exsit_Flag = 0 then
Account_Array.EXTEND( 1 );
Account_Array(Account_V_Num): = Bank_Account;
Account_V_Num: = Account_V_Num + 1 ;
end if ;
end if ;
end loop;
-- Dbms_Output.put_line('Array is:'||URL_Array(1));
-- 插入获取到的URL
FOR I IN 1 ..URL_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' URL ' , ' 1 ' ,URL_Array(I));
END LOOP;
-- 插入获取到的Email
FOR I IN 1 ..Email_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Email ' , ' 0 ' ,Email_Array(I));
END LOOP;
-- 插入获取到的Phone
FOR I IN 1 ..Phone_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Phone ' , ' 0 ' ,Phone_Array(I));
END LOOP;
-- 插入获取到的Bank_Account
FOR I IN 1 ..Account_Array. COUNT LOOP
insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values (article_detail_id_$,Record_Md5_ID_$, ' Bank Account ' , ' 0 ' ,Account_Array(I));
END LOOP;
end if ;
Exception
when NO_DATA_FOUND then
ReturnValue: =- 1000 ;
when DUP_VAL_INDEX then
ReturnValue: =- 1001 ;
when VALUE_ERROR then
ReturnValue: =- 1002 ;
when TOO_MANY_ROWS then
ReturnValue: =- 1003 ;
when OTHERS then
ReturnValue: =- 1004 ;
-- Dbms_Output.put_line('The SQLCode is: '||SQLCODE);
-- Dbms_Output.put_line('The SQLERRM is: '||SQLERRM);
end ;
end SP_GET_ARTICLE_DATA;