oracle模糊查询:分区局部全文索引方式(四)

DATABASE:dwtest(210开发库)

--按根级行业ID和大洲进行混合分区
DROP TABLE iquery.lpx_kw_tmp02_bak2;
CREATE TABLE iquery.lpx_kw_tmp02_bak2(
 KEYWORDS                                           VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
)PARTITION BY RANGE(CATEGORY_ROOT_ID)
(    PARTITION p1 VALUES LESS THAN(1) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p2 VALUES LESS THAN(2) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p3 VALUES LESS THAN(3) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p4 VALUES LESS THAN(4) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p5 VALUES LESS THAN(5) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p6 VALUES LESS THAN(6) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p7 VALUES LESS THAN(7) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p8 VALUES LESS THAN(8) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p9 VALUES LESS THAN(9) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p10 VALUES LESS THAN(10) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p11 VALUES LESS THAN(11) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p12 VALUES LESS THAN(12) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p13 VALUES LESS THAN(13) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p14 VALUES LESS THAN(14) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p15 VALUES LESS THAN(15) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p16 VALUES LESS THAN(16) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p17 VALUES LESS THAN(17) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p18 VALUES LESS THAN(18) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p19 VALUES LESS THAN(19) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p20 VALUES LESS THAN(20) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p21 VALUES LESS THAN(21) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p22 VALUES LESS THAN(23) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p23 VALUES LESS THAN(26) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p24 VALUES LESS THAN(28) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p25 VALUES LESS THAN(30) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p26 VALUES LESS THAN(32) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p27 VALUES LESS THAN(34) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p28 VALUES LESS THAN(36) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p29 VALUES LESS THAN(39) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p30 VALUES LESS THAN(41) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p31 VALUES LESS THAN(42) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p32 VALUES LESS THAN(43) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p33 VALUES LESS THAN(44) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p34 VALUES LESS THAN(66) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p35 VALUES LESS THAN(80) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p36 VALUES LESS THAN(322) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p37 VALUES LESS THAN(339) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p38 VALUES LESS THAN(502) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p39 VALUES LESS THAN(509) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p40 VALUES LESS THAN(1500) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p41 VALUES LESS THAN(2000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p42 VALUES LESS THAN(3000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p43 VALUES LESS THAN(999999999) TABLESPACE TBS_EN_DSS_2009
);


INSERT INTO iquery.lpx_kw_tmp02_bak2
SELECT keywords,
       REGIN_NAME,
       CATEGORY_ROOT_ID,
       SUM(SRH_PV_THIS_5MON) AS SRH_PV_THIS_5MON
  FROM iquery.lpx_kw_tmp02
 WHERE CATEGORY_ROOT_ID IS NOT NULL  --去除类目为空的记录
 GROUP BY keywords,
       REGION_ID,
       REGIN_NAME,
       CATEGORY_ROOT_ID;
COMMIT;

7799710 rows created.

SQL>
Commit complete.

SQL> select count(1) from iquery.lpx_kw_tmp02_bak2;


  COUNT(1)
----------
   7799710



--造测试数据
DROP TABLE iquery.lpx_kw_test;
CREATE TABLE iquery.lpx_kw_test TABLESPACE TBS_EN_DSS_2009
AS
SELECT *
  FROM (
        SELECT SUBSTR(keywords, INSTR(keywords, ' ', 1)+1, INSTR(keywords, ' ', 1, 2) - INSTR(keywords, ' ', 1) - 1) AS kw,
               a.*,
               row_number() OVER(PARTITION BY category_root_id ORDER BY dbms_random.value DESC) AS rw
          FROM iquery.lpx_kw_tmp02_bak2  a
         WHERE length(keywords) - length(REPLACE(keywords, ' ', '')) > 2
       )
 WHERE rw < 100 + 1
   AND length(kw) > 5
 ;
 
 SQL> select count(1) from iquery.lpx_kw_test;


  COUNT(1)
----------
      1577

 

--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select kw, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE a.keywords LIKE '%'||ref_cursor.kw||'%'
    AND a.category_root_id = ref_cursor.category_root_id   --根级行业
    AND a.regin_name = ref_cursor.regin_name               --买家所在大洲
  GROUP BY ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/



--无全文索引的性能
total records:1577
total time(ms):35000
avg time(ms):22.19403931515535827520608750792644261256

 

 

 

 

--创建全文索引
grant ctxapp to etl;
exec ctx_ddl.create_preference('lpxuan_kw','BASIC_LEXER');
--exec ctx_ddl.drop_preference('lpxuan_kw');
CREATE INDEX idx_kw_keyw on iquery.lpx_kw_tmp02_bak2(keywords) INDEXTYPE IS CTXSYS.CONTEXT parameters('lexer lpxuan_kw')
LOCAL (partition p1, partition p2, partition p3, partition p4, partition p5,partition p6, partition p7, partition p8, partition p9, partition p10,
       partition p11, partition p12, partition p13, partition p14, partition p15,partition p16, partition p17, partition p18, partition p19, partition p20,
       partition p21, partition p22, partition p23, partition p24, partition p25,partition p26, partition p27, partition p28, partition p29, partition p30,
       partition p31, partition p32, partition p33, partition p34, partition p35,partition p36, partition p37, partition p38, partition p39, partition p40,
       partition p41, partition p42, partition p43);

--eg:
/*
create index html2_idx on HTML2(newsdescription) indextype is ctxsys.context local
(partition indx parameters('lexer my_lexer'),partition indx01 tablespace
users01  parameters('lexer my_lexer'),partition indx02 tablespace users02 param
eters('lexer my_lexer'),partition indx03 tablespace users03  parameters('lexer m
y_lexer') );
*/


CREATE or replace procedure rel_kw_sync_index as
      begin
      ctx_ddl.sync_index('idx_kw_keyw');
      end;
      /

Procedure created.

Elapsed: 00:00:00.08

VARIABLE jobno number;
BEGIN
  DBMS_JOB.SUBMIT(:jobno,'rel_kw_sync_index();',
  SYSDATE, 'SYSDATE + (1/24/4)');
  commit;
  END;
  /


--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select REPLACE(REPLACE(kw, '-', '/-'), '&', '/&') AS kw, kw AS kw_bak, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE contains(a.keywords,ref_cursor.kw) > 0             --走全文索引
    AND a.category_root_id = ref_cursor.category_root_id   --根级行业
    AND a.regin_name = ref_cursor.regin_name               --买家所在大洲
  GROUP BY ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/


--有全文索引的性能
total records:1577
total time(ms):9000
avg time(ms):5.70703868103994927076727964489537095751

PL/SQL procedure successfully completed.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值