DATABASE:dwtest(210开发库)
--按根级行业ID和大洲进行混合分区
DROP TABLE iquery.lpx_kw_tmp02_bak2;
CREATE TABLE iquery.lpx_kw_tmp02_bak2(
KEYWORDS VARCHAR2(256)
,REGIN_NAME VARCHAR2(128)
,CATEGORY_ROOT_ID NUMBER
,SRH_PV_THIS_5MON NUMBER
)PARTITION BY RANGE(CATEGORY_ROOT_ID)
( PARTITION p1 VALUES LESS THAN(1) TABLESPACE TBS_EN_DSS_2009,
PARTITION p2 VALUES LESS THAN(2) TABLESPACE TBS_EN_DSS_2009,
PARTITION p3 VALUES LESS THAN(3) TABLESPACE TBS_EN_DSS_2009,
PARTITION p4 VALUES LESS THAN(4) TABLESPACE TBS_EN_DSS_2009,
PARTITION p5 VALUES LESS THAN(5) TABLESPACE TBS_EN_DSS_2009,
PARTITION p6 VALUES LESS THAN(6) TABLESPACE TBS_EN_DSS_2009,
PARTITION p7 VALUES LESS THAN(7) TABLESPACE TBS_EN_DSS_2009,
PARTITION p8 VALUES LESS THAN(8) TABLESPACE TBS_EN_DSS_2009,
PARTITION p9 VALUES LESS THAN(9) TABLESPACE TBS_EN_DSS_2009,
PARTITION p10 VALUES LESS THAN(10) TABLESPACE TBS_EN_DSS_2009,
PARTITION p11 VALUES LESS THAN(11) TABLESPACE TBS_EN_DSS_2009,
PARTITION p12 VALUES LESS THAN(12) TABLESPACE TBS_EN_DSS_2009,
PARTITION p13 VALUES LESS THAN(13) TABLESPACE TBS_EN_DSS_2009,
PARTITION p14 VALUES LESS THAN(14) TABLESPACE TBS_EN_DSS_2009,
PARTITION p15 VALUES LESS THAN(15) TABLESPACE TBS_EN_DSS_2009,
PARTITION p16 VALUES LESS THAN(16) TABLESPACE TBS_EN_DSS_2009,
PARTITION p17 VALUES LESS THAN(17) TABLESPACE TBS_EN_DSS_2009,
PARTITION p18 VALUES LESS THAN(18) TABLESPACE TBS_EN_DSS_2009,
PARTITION p19 VALUES LESS THAN(19) TABLESPACE TBS_EN_DSS_2009,
PARTITION p20 VALUES LESS THAN(20) TABLESPACE TBS_EN_DSS_2009,
PARTITION p21 VALUES LESS THAN(21) TABLESPACE TBS_EN_DSS_2009,
PARTITION p22 VALUES LESS THAN(23) TABLESPACE TBS_EN_DSS_2009,
PARTITION p23 VALUES LESS THAN(26) TABLESPACE TBS_EN_DSS_2009,
PARTITION p24 VALUES LESS THAN(28) TABLESPACE TBS_EN_DSS_2009,
PARTITION p25 VALUES LESS THAN(30) TABLESPACE TBS_EN_DSS_2009,
PARTITION p26 VALUES LESS THAN(32) TABLESPACE TBS_EN_DSS_2009,
PARTITION p27 VALUES LESS THAN(34) TABLESPACE TBS_EN_DSS_2009,
PARTITION p28 VALUES LESS THAN(36) TABLESPACE TBS_EN_DSS_2009,
PARTITION p29 VALUES LESS THAN(39) TABLESPACE TBS_EN_DSS_2009,
PARTITION p30 VALUES LESS THAN(41) TABLESPACE TBS_EN_DSS_2009,
PARTITION p31 VALUES LESS THAN(42) TABLESPACE TBS_EN_DSS_2009,
PARTITION p32 VALUES LESS THAN(43) TABLESPACE TBS_EN_DSS_2009,
PARTITION p33 VALUES LESS THAN(44) TABLESPACE TBS_EN_DSS_2009,
PARTITION p34 VALUES LESS THAN(66) TABLESPACE TBS_EN_DSS_2009,
PARTITION p35 VALUES LESS THAN(80) TABLESPACE TBS_EN_DSS_2009,
PARTITION p36 VALUES LESS THAN(322) TABLESPACE TBS_EN_DSS_2009,
PARTITION p37 VALUES LESS THAN(339) TABLESPACE TBS_EN_DSS_2009,
PARTITION p38 VALUES LESS THAN(502) TABLESPACE TBS_EN_DSS_2009,
PARTITION p39 VALUES LESS THAN(509) TABLESPACE TBS_EN_DSS_2009,
PARTITION p40 VALUES LESS THAN(1500) TABLESPACE TBS_EN_DSS_2009,
PARTITION p41 VALUES LESS THAN(2000) TABLESPACE TBS_EN_DSS_2009,
PARTITION p42 VALUES LESS THAN(3000) TABLESPACE TBS_EN_DSS_2009,
PARTITION p43 VALUES LESS THAN(999999999) TABLESPACE TBS_EN_DSS_2009
);
INSERT INTO iquery.lpx_kw_tmp02_bak2
SELECT keywords,
REGIN_NAME,
CATEGORY_ROOT_ID,
SUM(SRH_PV_THIS_5MON) AS SRH_PV_THIS_5MON
FROM iquery.lpx_kw_tmp02
WHERE CATEGORY_ROOT_ID IS NOT NULL --去除类目为空的记录
GROUP BY keywords,
REGION_ID,
REGIN_NAME,
CATEGORY_ROOT_ID;
COMMIT;
7799710 rows created.
SQL>
Commit complete.
SQL> select count(1) from iquery.lpx_kw_tmp02_bak2;
COUNT(1)
----------
7799710
--造测试数据
DROP TABLE iquery.lpx_kw_test;
CREATE TABLE iquery.lpx_kw_test TABLESPACE TBS_EN_DSS_2009
AS
SELECT *
FROM (
SELECT SUBSTR(keywords, INSTR(keywords, ' ', 1)+1, INSTR(keywords, ' ', 1, 2) - INSTR(keywords, ' ', 1) - 1) AS kw,
a.*,
row_number() OVER(PARTITION BY category_root_id ORDER BY dbms_random.value DESC) AS rw
FROM iquery.lpx_kw_tmp02_bak2 a
WHERE length(keywords) - length(REPLACE(keywords, ' ', '')) > 2
)
WHERE rw < 100 + 1
AND length(kw) > 5
;
SQL> select count(1) from iquery.lpx_kw_test;
COUNT(1)
----------
1577
--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
KW VARCHAR2(256)
,REGIN_NAME VARCHAR2(128)
,CATEGORY_ROOT_ID NUMBER
,SRH_PV_THIS_5MON NUMBER
);
set serveroutput ON;
DECLARE
cnt NUMBER;
start_date DATE;
end_date DATE;
CURSOR c1
IS
select kw, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
BEGIN
cnt := 0;
start_date := SYSDATE;
FOR ref_cursor IN c1
LOOP
INSERT INTO iquery.lpx_kw_tmp03
SELECT ref_cursor.kw
,ref_cursor.regin_name
,ref_cursor.category_root_id
,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
FROM iquery.lpx_kw_tmp02_bak2 a
WHERE a.keywords LIKE '%'||ref_cursor.kw||'%'
AND a.category_root_id = ref_cursor.category_root_id --根级行业
AND a.regin_name = ref_cursor.regin_name --买家所在大洲
GROUP BY ref_cursor.kw
,ref_cursor.regin_name
,ref_cursor.category_root_id;
COMMIT;
cnt := cnt + 1;
END LOOP;
end_date := SYSDATE;
DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
END;
/
--无全文索引的性能
total records:1577
total time(ms):35000
avg time(ms):22.19403931515535827520608750792644261256
--创建全文索引
grant ctxapp to etl;
exec ctx_ddl.create_preference('lpxuan_kw','BASIC_LEXER');
--exec ctx_ddl.drop_preference('lpxuan_kw');
CREATE INDEX idx_kw_keyw on iquery.lpx_kw_tmp02_bak2(keywords) INDEXTYPE IS CTXSYS.CONTEXT parameters('lexer lpxuan_kw')
LOCAL (partition p1, partition p2, partition p3, partition p4, partition p5,partition p6, partition p7, partition p8, partition p9, partition p10,
partition p11, partition p12, partition p13, partition p14, partition p15,partition p16, partition p17, partition p18, partition p19, partition p20,
partition p21, partition p22, partition p23, partition p24, partition p25,partition p26, partition p27, partition p28, partition p29, partition p30,
partition p31, partition p32, partition p33, partition p34, partition p35,partition p36, partition p37, partition p38, partition p39, partition p40,
partition p41, partition p42, partition p43);
--eg:
/*
create index html2_idx on HTML2(newsdescription) indextype is ctxsys.context local
(partition indx parameters('lexer my_lexer'),partition indx01 tablespace
users01 parameters('lexer my_lexer'),partition indx02 tablespace users02 param
eters('lexer my_lexer'),partition indx03 tablespace users03 parameters('lexer m
y_lexer') );
*/
CREATE or replace procedure rel_kw_sync_index as
begin
ctx_ddl.sync_index('idx_kw_keyw');
end;
/
Procedure created.
Elapsed: 00:00:00.08
VARIABLE jobno number;
BEGIN
DBMS_JOB.SUBMIT(:jobno,'rel_kw_sync_index();',
SYSDATE, 'SYSDATE + (1/24/4)');
commit;
END;
/
--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
KW VARCHAR2(256)
,REGIN_NAME VARCHAR2(128)
,CATEGORY_ROOT_ID NUMBER
,SRH_PV_THIS_5MON NUMBER
);
set serveroutput ON;
DECLARE
cnt NUMBER;
start_date DATE;
end_date DATE;
CURSOR c1
IS
select REPLACE(REPLACE(kw, '-', '/-'), '&', '/&') AS kw, kw AS kw_bak, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
BEGIN
cnt := 0;
start_date := SYSDATE;
FOR ref_cursor IN c1
LOOP
INSERT INTO iquery.lpx_kw_tmp03
SELECT ref_cursor.kw_bak
,ref_cursor.regin_name
,ref_cursor.category_root_id
,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
FROM iquery.lpx_kw_tmp02_bak2 a
WHERE contains(a.keywords,ref_cursor.kw) > 0 --走全文索引
AND a.category_root_id = ref_cursor.category_root_id --根级行业
AND a.regin_name = ref_cursor.regin_name --买家所在大洲
GROUP BY ref_cursor.kw_bak
,ref_cursor.regin_name
,ref_cursor.category_root_id;
COMMIT;
cnt := cnt + 1;
END LOOP;
end_date := SYSDATE;
DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
END;
/
--有全文索引的性能
total records:1577
total time(ms):9000
avg time(ms):5.70703868103994927076727964489537095751
PL/SQL procedure successfully completed.