machine: 4核16G*3
nodes:3
stream_616d2986bd17682e66aedbb3: 899035 Lines
stream_617fc79a355b7f54684a9c8b: 2693935 Lines
ClickHouse Add Secondary Index
ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]
and if table has data, need to do
OPTIMIZE TABLE table_name FINAL
Taxonomy (default)
stream_616d2986bd17682e66aedbb3 | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 261 | |
Add Index | 208 | 1634.967 (optimized query) |
Add Index+View Index | 106 | 449.933 (optimized query) |
Default(optimized query) | 52 |
stream_617fc79a355b7f54684a9c8b | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 229 | |
Add Index | 305 | |
Add Index+View Index | 253 | |
Default(optimize query) | 30 |
clickhouse cluster
default
SELECT
col0 AS diffcol0,
count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
SELECT
tab0._id,
arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: d92e1b4c-116d-4636-ae02-3fd134827ab6
┌─diffcol0────────────┬───count─┐
│ 消费洞察话题 │ 1622047 │
│ 产品体验 │ 904200 │
│ 品牌/营销 │ 562340 │
│ 味道 │ 442253 │
│ 味道好坏 │ 380336 │
└─────────────────────┴─────────┘
50 rows in set. Elapsed: 0.261 sec. Processed 1.80 million rows, 172.06 MB (6.90 million rows/s., 659.83 MB/s.)
SELECT
col0 AS diffcol0,
count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
SELECT
tab0._id,
arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 78876d0e-1dd1-4eac-8546-3d371a4f28cd
┌─diffcol0───┬───count─┐
│ │ 2604525 │
│ 场景和受众 │ 102057 │
│ 受众人群 │ 76454 │
│ 孩子 │ 46013 │
│ 使用场景 │ 25603 │
└────────────┴─────────┘
14 rows in set. Elapsed: 0.229 sec. Processed 5.39 million rows, 94.37 MB (23.51 million rows/s., 411.86 MB/s.)
Add Table Index
SELECT
col0 AS diffcol0,
count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 9ed1734c-7f46-47d4-b400-9adebd73d2b6
┌─diffcol0────────────┬───count─┐
│ 消费洞察话题 │ 1622047 │
│ 产品体验 │ 904200 │
│ 品牌/营销 │ 562340 │
│ 味道 │ 442253 │
│ 味道好坏 │ 380336 │
└─────────────────────┴─────────┘
50 rows in set. Elapsed: 0.208 sec. Processed 1.80 million rows, 172.06 MB (8.66 million rows/s., 829.05 MB/s.)
SELECT
col0 AS diffcol0,
count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 49b6e9fe-55eb-472b-bfe7-8f6d831bc81a
┌─diffcol0───┬───count─┐
│ │ 2604525 │
│ 场景和受众 │ 102057 │
│ 受众人群 │ 76454 │
│ 孩子 │ 46013 │
│ 使用场景 │ 25603 │
└────────────┴─────────┘
Add Table Index+View Index
SELECT
vw_unnested AS diffcol0,
count(*) AS count
FROM `vw_s_dbb3_f_7d1_ff35_t_labels$textual_cluster`
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 5e0505be-ca2a-40f2-a82f-56984230dfce
┌─diffcol0────────────┬───count─┐
│ 消费洞察话题 │ 1622047 │
│ 产品体验 │ 904200 │
│ 品牌/营销 │ 562340 │
│ 味道 │ 442253 │
│ 味道好坏 │ 380336 │
└─────────────────────┴─────────┘
50 rows in set. Elapsed: 0.106 sec. Processed 6.68 million rows, 143.19 MB (62.82 million rows/s., 1.35 GB/s.)
SELECT
col0 AS diffcol0,
count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
SELECT
_id,
vw_unnested AS col0
FROM `vw_s_9c8b_f_aq2_20f9_t_labels$textual_c_index` AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 61c537ce-0663-4ef4-bfeb-2054977b6bfa
┌─diffcol0───┬───count─┐
│ │ 2604525 │
│ 场景和受众 │ 102057 │
│ 受众人群 │ 76454 │
│ 孩子 │ 46013 │
│ 使用场景 │ 25603 │
└────────────┴─────────┘
14 rows in set. Elapsed: 0.253 sec. Processed 5.60 million rows, 52.03 MB (22.19 million rows/s., 206.01 MB/s.)
Default (optimize query)
SELECT
arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS diffcol0,
count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: 72f79358-f560-4ca2-a217-40deb51f9738
┌─diffcol0────────────┬───count─┐
│ 消费洞察话题 │ 1622047 │
│ 产品体验 │ 904200 │
│ 品牌/营销 │ 562340 │
│ 味道 │ 442253 │
│ 味道好坏 │ 380336 │
└─────────────────────┴─────────┘
50 rows in set. Elapsed: 0.052 sec. Processed 899.03 thousand rows, 164.87 MB (17.18 million rows/s., 3.15 GB/s.)
SELECT
arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS diffcol0,
count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50
Query id: f21f7a45-c338-45aa-b9a5-86e6cf73dd3f
┌─diffcol0───┬───count─┐
│ │ 2604525 │
│ 场景和受众 │ 102057 │
│ 受众人群 │ 76454 │
│ 孩子 │ 46013 │
│ 使用场景 │ 25603 │
│ 早餐 │ 12560 │
└────────────┴─────────┘
14 rows in set. Elapsed: 0.030 sec. Processed 2.69 million rows, 72.82 MB (90.60 million rows/s., 2.45 GB/s.)
Taxonomy (ngrams)
stream_616d2986bd17682e66aedbb3 | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 110 | |
Add Index | 109 | 3667.772 |
Add Index+View Index | 139 | 787.751 |
Default(optimize query) | 50 |
stream_617fc79a355b7f54684a9c8b | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 186 | |
Add Index | 197 | |
Add Index+View Index | 182 | |
Default(optimize query) | 58 |
clickhouse cluster
default
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 7d35386d-9577-461e-a418-159dee742060
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.110 sec. Processed 1.80 million rows, 205.28 MB (16.40 million rows/s., 1.87 GB/s.)
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 4309106d-64c9-4e6e-a204-cb5d5149760d
┌─diffcol0─────────────────────────┬─count─┐
│ [''] │ 17795 │
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
└──────────────────────────────────┴───────┘
6 rows in set. Elapsed: 0.186 sec. Processed 5.39 million rows, 161.97 MB (28.97 million rows/s., 870.90 MB/s.)
Add Table Index
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 1843fa87-e107-4200-9dff-5871c388040f
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.109 sec. Processed 1.80 million rows, 205.28 MB (16.44 million rows/s., 1.88 GB/s.)
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: b98635af-40ca-4fea-aab8-fa7ecde3fe8a
┌─diffcol0─────────────────────────┬─count─┐
│ [''] │ 17795 │
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
└──────────────────────────────────┴───────┘
6 rows in set. Elapsed: 0.197 sec. Processed 5.39 million rows, 159.14 MB (27.34 million rows/s., 807.65 MB/s.)
Add Table Index+View Index
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE (tab1._id IN (
SELECT _id
FROM stream_ngrams
WHERE hasAny(unnested, ['日期_新鲜', '值得_信赖'])
)) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: cef4c535-8820-4ed9-b0ce-20d171e844ca
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.139 sec. Processed 1.80 million rows, 172.06 MB (12.98 million rows/s., 1.24 GB/s
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
SELECT
_id,
vw_unnested AS col0
FROM `vw_s_9c8b_f_aq2_20f9_t_labels$textual_tax_c_index` AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: be91e8da-7beb-4a06-9515-e2e95a985d35
┌─diffcol0─────────────────────────┬─count─┐
│ [''] │ 17795 │
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
└──────────────────────────────────┴───────┘
6 rows in set. Elapsed: 0.182 sec. Processed 5.40 million rows, 137.64 MB (29.66 million rows/s., 755.90 MB/s.)
Default(optimize query)
SELECT
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(tab0.`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 7a361ad9-0637-4a1b-b639-1e0f5523fb2b
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.050 sec. Processed 899.03 thousand rows, 198.09 MB (17.93 million rows/s., 3.95 GB/s.)
SELECT
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 8a8d5919-b594-481b-9f7b-33d6dd0e8f51
┌─diffcol0─────────────────────────┬─count─┐
│ [''] │ 17795 │
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
└──────────────────────────────────┴───────┘
6 rows in set. Elapsed: 0.058 sec. Processed 2.69 million rows, 140.41 MB (46.17 million rows/s., 2.41 GB/s.)
Taxonomy node ngram
stream_616d2986bd17682e66aedbb3 | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 331 | |
Add Index | 335 | 1186.529 |
Default(optimize query) | 120 |
stream_617fc79a355b7f54684a9c8b | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 100 | |
Add Index | 102 |
clickhouse cluster
default
SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
SELECT al._id
FROM
(
SELECT
unwhe._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo,
`f_7d1_ff35_t_labels$textual` AS af
FROM stream_616d2986bd17682e66aedbb3_cluster AS unwhe
) AS al
WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['消费洞察话题'])
))
Query id: 08937001-171f-4648-a57d-a68ab462aad1
┌─count()─┐
│ 19314 │
└─────────┘
1 rows in set. Elapsed: 0.331 sec. Processed 899.03 thousand rows, 36.81 MB (2.72 million rows/s., 111.28 MB/s.)
SELECT count(*)
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (_id IN (
SELECT al._id
FROM
(
SELECT
unwhe._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo,
`f_aq2_20f9_t_labels$textual` AS af
FROM stream_617fc79a355b7f54684a9c8b_node AS unwhe
) AS al
WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['场景和受众'])
))
Query id: 7869d92a-b699-4aee-90ae-df1d38676cef
┌─count()─┐
│ 41 │
└─────────┘
1 rows in set. Elapsed: 0.100 sec. Processed 2.69 million rows, 78.37 MB (26.89 million rows/s., 782.14 MB/s.)
Add Table Index
SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
SELECT al._id
FROM
(
SELECT
unwhe._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo,
`f_7d1_ff35_t_labels$textual` AS af
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS unwhe
) AS al
WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['消费洞察话题'])
))
Query id: 5d1ef23c-db6a-436e-a56e-9284abbed19c
┌─count()─┐
│ 19314 │
└─────────┘
1 rows in set. Elapsed: 0.335 sec. Processed 899.02 thousand rows, 36.81 MB (2.68 million rows/s., 109.76 MB/s.)
SELECT count(*)
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (_id IN (
SELECT al._id
FROM
(
SELECT
unwhe._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo,
`f_aq2_20f9_t_labels$textual` AS af
FROM stream_617fc79a355b7f54684a9c8b_node_index AS unwhe
) AS al
WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['场景和受众'])
))
Query id: 448f0888-233f-4487-a407-efd44ca9666e
┌─count()─┐
│ 41 │
└─────────┘
1 rows in set. Elapsed: 0.102 sec. Processed 2.69 million rows, 75.55 MB (26.39 million rows/s., 740.16 MB/s.)
Default (optimize query)
SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
SELECT _id
FROM stream_616d2986bd17682e66aedbb3_cluster
WHERE ((arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
))
Query id: 384305f9-f050-48e6-90b4-c70191be4bcc
┌─count()─┐
│ 19314 │
└─────────┘
1 rows in set. Elapsed: 0.110 sec. Processed 899.03 thousand rows, 36.81 MB (8.18 million rows/s., 334.82 MB/s.)
ch1 :) SELECT count(*) FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0 WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id in (select _id from stream_616d2986bd17682e66aedbb3_cluster where arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]= '消费洞察话题' AND hasAll(arrayFlatten(f_7d1_ff35_t_labels$textual), ['消费洞察话题'])))
SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
SELECT _id
FROM stream_616d2986bd17682e66aedbb3_cluster
WHERE ((arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
))
Query id: 832de13b-21bf-4730-af9a-159ba058b186
┌─count()─┐
│ 19314 │
└─────────┘
1 rows in set. Elapsed: 0.120 sec. Processed 899.03 thousand rows, 36.81 MB (7.48 million rows/s., 306.30 MB/s.)
Taxonomy node ngrams
stream_616d2986bd17682e66aedbb3 | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 453 | |
Add Index | 417 | 3689.966 |
Add Index + View | 1621.117 | |
Default(optimize query) | 53 |
stream_617fc79a355b7f54684a9c8b | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default | 245 | |
Add Index | 241 | |
Add Index + View |
clickhouse cluster
default
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE ((tab0._id IN (
SELECT _id
FROM
(
SELECT
unwhe._id,
`f_7d1_ff35_t_labels$textual`,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo
FROM stream_616d2986bd17682e66aedbb3_cluster AS unwhe
) AS al
WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
)) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: bdfbbb01-9ec9-450d-bf43-ae734135aca6
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.453 sec. Processed 1.80 million rows, 205.28 MB (3.97 million rows/s., 452.99 MB/s.)
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE ((tab0._id IN (
SELECT _id
FROM
(
SELECT
unwhe._id,
`f_aq2_20f9_t_labels$textual`,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo
FROM stream_617fc79a355b7f54684a9c8b_node AS unwhe
) AS al
WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(`f_aq2_20f9_t_labels$textual`), ['场景和受众'])
)) AND hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: af92eb8c-6231-43b4-846b-1df8588b0bef
┌─diffcol0─────────────────────────┬─count─┐
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
│ ['场景和受众','受众人群','老人'] │ 3 │
└──────────────────────────────────┴───────┘
5 rows in set. Elapsed: 0.245 sec. Processed 5.39 million rows, 161.96 MB (22.00 million rows/s., 661.20 MB/s.)
Add Table Index
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE ((tab0._id IN (
SELECT _id
FROM
(
SELECT
unwhe._id,
`f_7d1_ff35_t_labels$textual`,
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo
FROM stream_616d2986bd17682e66aedbb3_cluster_index AS unwhe
) AS al
WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
)) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 3f4e58b3-5459-4068-8528-ab6dc73b2a75
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.417 sec. Processed 1.80 million rows, 205.28 MB (4.31 million rows/s., 492.56 MB/s.)
SELECT
col0 AS diffcol0,
count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
SELECT
tab0._id,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE ((tab0._id IN (
SELECT _id
FROM
(
SELECT
unwhe._id,
`f_aq2_20f9_t_labels$textual`,
arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo
FROM stream_617fc79a355b7f54684a9c8b_node_index AS unwhe
) AS al
WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(`f_aq2_20f9_t_labels$textual`), ['场景和受众'])
)) AND hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 2dd710bc-15ef-4479-837d-5da7fe2be7d7
┌─diffcol0─────────────────────────┬─count─┐
│ ['场景和受众','使用场景','早餐'] │ 2 │
│ ['场景和受众','使用场景','送礼'] │ 2 │
│ ['场景和受众','受众人群','女性'] │ 2 │
│ ['场景和受众','受众人群','孩子'] │ 32 │
│ ['场景和受众','受众人群','老人'] │ 3 │
└──────────────────────────────────┴───────┘
5 rows in set. Elapsed: 0.241 sec. Processed 5.39 million rows, 159.14 MB (22.34 million rows/s., 659.91 MB/s.)
Default (optimize query)
SELECT
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE ((diffcol0[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题']) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 90c63d4d-cc58-436b-8d31-b5445086b367
┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状'] │ 4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小'] │ 2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性'] │ 2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好'] │ 1221 │
└──────────────────────────────────────────────────────────┴───────┘
125 rows in set. Elapsed: 0.053 sec. Processed 899.03 thousand rows, 198.09 MB (16.92 million rows/s., 3.73 GB/s.)
Taxonomy score default
stream_616d2986bd17682e66aedbb3 | ClickHouse Cluster (/ms) | PGPool (/ms) |
---|---|---|
Default (optimize query) | 113 | |
Add Index (optimize query) | 125 | 28073.313 !!! |
clickhouse cluster
Default(optimize query)
SELECT
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
arrayJoin(`f_8d1_ff35_t_scores$numerical`) AS diffcolnest1,
count('*') AS col2
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY
diffcol0,
diffcolnest1
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 276530b7-0988-4a15-8e2d-95fa105fe2ef
┌─diffcol0──────────────────────────────────────────────┬─diffcolnest1─┬───col2─┐
│ [''] │ ᴺᵁᴸᴸ │ 189159 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ -3 │ 465 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 3 │ 404 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 1 │ 943 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 2 │ 808 │
└───────────────────────────────────────────────────────┴──────────────┴────────┘
200 rows in set. Elapsed: 0.113 sec. Processed 899.03 thousand rows, 181.12 MB (7.96 million rows/s., 1.60 GB/s.)
Add Table Index
SELECT
arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
arrayJoin(`f_8d1_ff35_t_scores$numerical`) AS diffcolnest1,
count('*') AS col2
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY
diffcol0,
diffcolnest1
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200
Query id: 276530b7-0988-4a15-8e2d-95fa105fe2ef
┌─diffcol0──────────────────────────────────────────────┬─diffcolnest1─┬───col2─┐
│ [''] │ ᴺᵁᴸᴸ │ 189159 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ -3 │ 465 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 3 │ 404 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 1 │ 943 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性'] │ 2 │ 808 │
└───────────────────────────────────────────────────────┴──────────────┴────────┘
200 rows in set. Elapsed: 0.113 sec. Processed 899.03 thousand rows, 181.12 MB (7.96 million rows/s., 1.60 GB/s.)