ClickHouse Cluster vs PGPool

machine: 4核16G*3

nodes:3

stream_616d2986bd17682e66aedbb3: 899035 Lines

stream_617fc79a355b7f54684a9c8b: 2693935 Lines

ClickHouse Add Secondary Index

ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]

and if table has data, need to do

OPTIMIZE TABLE table_name FINAL

Clickhouse:如何在 clickhouse 中使用 `Data Skipping Indexes` 和 `Manipulations With Data Skipping Indices` 功能

Taxonomy (default)

stream_616d2986bd17682e66aedbb3ClickHouse Cluster (/ms)PGPool (/ms)
Default261
Add Index2081634.967 (optimized query)
Add Index+View Index106449.933 (optimized query)
Default(optimized query)52
stream_617fc79a355b7f54684a9c8bClickHouse Cluster (/ms)PGPool (/ms)
Default229
Add Index305
Add Index+View Index253
Default(optimize query)30

clickhouse cluster

default

SELECT
    col0 AS diffcol0,
    count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: d92e1b4c-116d-4636-ae02-3fd134827ab6

┌─diffcol0────────────┬───count─┐
│ 消费洞察话题        │ 1622047 │
│ 产品体验            │  904200 │
│ 品牌/营销           │  562340 │
│ 味道                │  442253 │
│ 味道好坏            │  380336 │
└─────────────────────┴─────────┘

50 rows in set. Elapsed: 0.261 sec. Processed 1.80 million rows, 172.06 MB (6.90 million rows/s., 659.83 MB/s.)

SELECT
    col0 AS diffcol0,
    count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 78876d0e-1dd1-4eac-8546-3d371a4f28cd

┌─diffcol0───┬───count─┐
│            │ 2604525 │
│ 场景和受众 │  102057 │
│ 受众人群   │   76454 │
│ 孩子       │   46013 │
│ 使用场景   │   25603 │
└────────────┴─────────┘

14 rows in set. Elapsed: 0.229 sec. Processed 5.39 million rows, 94.37 MB (23.51 million rows/s., 411.86 MB/s.)

Add Table Index

SELECT
    col0 AS diffcol0,
    count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 9ed1734c-7f46-47d4-b400-9adebd73d2b6

┌─diffcol0────────────┬───count─┐
│ 消费洞察话题        │ 1622047 │
│ 产品体验            │  904200 │
│ 品牌/营销           │  562340 │
│ 味道                │  442253 │
│ 味道好坏            │  380336 │
└─────────────────────┴─────────┘

50 rows in set. Elapsed: 0.208 sec. Processed 1.80 million rows, 172.06 MB (8.66 million rows/s., 829.05 MB/s.)

SELECT
    col0 AS diffcol0,
    count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 49b6e9fe-55eb-472b-bfe7-8f6d831bc81a

┌─diffcol0───┬───count─┐
│            │ 2604525 │
│ 场景和受众 │  102057 │
│ 受众人群   │   76454 │
│ 孩子       │   46013 │
│ 使用场景   │   25603 │
└────────────┴─────────┘

Add Table Index+View Index

SELECT
    vw_unnested AS diffcol0,
    count(*) AS count
FROM `vw_s_dbb3_f_7d1_ff35_t_labels$textual_cluster`
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 5e0505be-ca2a-40f2-a82f-56984230dfce

┌─diffcol0────────────┬───count─┐
│ 消费洞察话题        │ 1622047 │
│ 产品体验            │  904200 │
│ 品牌/营销           │  562340 │
│ 味道                │  442253 │
│ 味道好坏            │  380336 │
└─────────────────────┴─────────┘

50 rows in set. Elapsed: 0.106 sec. Processed 6.68 million rows, 143.19 MB (62.82 million rows/s., 1.35 GB/s.)
SELECT
    col0 AS diffcol0,
    count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
    SELECT
        _id,
        vw_unnested AS col0
    FROM `vw_s_9c8b_f_aq2_20f9_t_labels$textual_c_index` AS tab0
) AS tab1
WHERE tab1._id = tab0._id
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 61c537ce-0663-4ef4-bfeb-2054977b6bfa

┌─diffcol0───┬───count─┐
│            │ 2604525 │
│ 场景和受众 │  102057 │
│ 受众人群   │   76454 │
│ 孩子       │   46013 │
│ 使用场景   │   25603 │
└────────────┴─────────┘

14 rows in set. Elapsed: 0.253 sec. Processed 5.60 million rows, 52.03 MB (22.19 million rows/s., 206.01 MB/s.)

Default (optimize query)

SELECT
    arrayJoin(arrayJoin(`f_7d1_ff35_t_labels$textual`)) AS diffcol0,
    count(*) AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: 72f79358-f560-4ca2-a217-40deb51f9738

┌─diffcol0────────────┬───count─┐
│ 消费洞察话题        │ 1622047 │
│ 产品体验            │  904200 │
│ 品牌/营销           │  562340 │
│ 味道                │  442253 │
│ 味道好坏            │  380336 │
└─────────────────────┴─────────┘

50 rows in set. Elapsed: 0.052 sec. Processed 899.03 thousand rows, 164.87 MB (17.18 million rows/s., 3.15 GB/s.)
SELECT
    arrayJoin(arrayJoin(`f_aq2_20f9_t_labels$textual`)) AS diffcol0,
    count(*) AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
GROUP BY diffcol0
ORDER BY count DESC NULLS LAST
LIMIT 0, 50

Query id: f21f7a45-c338-45aa-b9a5-86e6cf73dd3f

┌─diffcol0───┬───count─┐
│            │ 2604525 │
│ 场景和受众 │  102057 │
│ 受众人群   │   76454 │
│ 孩子       │   46013 │
│ 使用场景   │   25603 │
│ 早餐       │   12560 │
└────────────┴─────────┘

14 rows in set. Elapsed: 0.030 sec. Processed 2.69 million rows, 72.82 MB (90.60 million rows/s., 2.45 GB/s.)

Taxonomy (ngrams)

stream_616d2986bd17682e66aedbb3ClickHouse Cluster (/ms)PGPool (/ms)
Default110
Add Index1093667.772
Add Index+View Index139787.751
Default(optimize query)50
stream_617fc79a355b7f54684a9c8bClickHouse Cluster (/ms)PGPool (/ms)
Default186
Add Index197
Add Index+View Index182
Default(optimize query)58

clickhouse cluster

default

SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 7d35386d-9577-461e-a418-159dee742060

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.110 sec. Processed 1.80 million rows, 205.28 MB (16.40 million rows/s., 1.87 GB/s.)
SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 4309106d-64c9-4e6e-a204-cb5d5149760d

┌─diffcol0─────────────────────────┬─count─┐
│ ['']17795 │
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
└──────────────────────────────────┴───────┘

6 rows in set. Elapsed: 0.186 sec. Processed 5.39 million rows, 161.97 MB (28.97 million rows/s., 870.90 MB/s.)

Add Table Index

SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 1843fa87-e107-4200-9dff-5871c388040f

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.109 sec. Processed 1.80 million rows, 205.28 MB (16.44 million rows/s., 1.88 GB/s.)
SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: b98635af-40ca-4fea-aab8-fa7ecde3fe8a

┌─diffcol0─────────────────────────┬─count─┐
│ ['']17795 │
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
└──────────────────────────────────┴───────┘

6 rows in set. Elapsed: 0.197 sec. Processed 5.39 million rows, 159.14 MB (27.34 million rows/s., 807.65 MB/s.)

Add Table Index+View Index

SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE (tab1._id IN (
    SELECT _id
    FROM stream_ngrams
    WHERE hasAny(unnested, ['日期_新鲜', '值得_信赖'])
)) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: cef4c535-8820-4ed9-b0ce-20d171e844ca

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.139 sec. Processed 1.80 million rows, 172.06 MB (12.98 million rows/s., 1.24 GB/s
SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
    SELECT
        _id,
        vw_unnested AS col0
    FROM `vw_s_9c8b_f_aq2_20f9_t_labels$textual_tax_c_index` AS tab0
) AS tab1
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: be91e8da-7beb-4a06-9515-e2e95a985d35

┌─diffcol0─────────────────────────┬─count─┐
│ ['']17795 │
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
└──────────────────────────────────┴───────┘

6 rows in set. Elapsed: 0.182 sec. Processed 5.40 million rows, 137.64 MB (29.66 million rows/s., 755.90 MB/s.)

Default(optimize query)

SELECT
    arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(tab0.`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 7a361ad9-0637-4a1b-b639-1e0f5523fb2b

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.050 sec. Processed 899.03 thousand rows, 198.09 MB (17.93 million rows/s., 3.95 GB/s.)
SELECT
    arrayJoin(`f_aq2_20f9_t_labels$textual`) AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 8a8d5919-b594-481b-9f7b-33d6dd0e8f51

┌─diffcol0─────────────────────────┬─count─┐
│ ['']17795 │
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
└──────────────────────────────────┴───────┘

6 rows in set. Elapsed: 0.058 sec. Processed 2.69 million rows, 140.41 MB (46.17 million rows/s., 2.41 GB/s.)

Taxonomy node ngram

stream_616d2986bd17682e66aedbb3ClickHouse Cluster (/ms)PGPool (/ms)
Default331
Add Index3351186.529
Default(optimize query)120
stream_617fc79a355b7f54684a9c8bClickHouse Cluster (/ms)PGPool (/ms)
Default100
Add Index102

clickhouse cluster

default

SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
    SELECT al._id
    FROM
    (
        SELECT
            unwhe._id,
            arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo,
            `f_7d1_ff35_t_labels$textual` AS af
        FROM stream_616d2986bd17682e66aedbb3_cluster AS unwhe
    ) AS al
    WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['消费洞察话题'])
))

Query id: 08937001-171f-4648-a57d-a68ab462aad1

┌─count()─┐
│   19314 │
└─────────┘

1 rows in set. Elapsed: 0.331 sec. Processed 899.03 thousand rows, 36.81 MB (2.72 million rows/s., 111.28 MB/s.)
SELECT count(*)
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (_id IN (
    SELECT al._id
    FROM
    (
        SELECT
            unwhe._id,
            arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo,
            `f_aq2_20f9_t_labels$textual` AS af
        FROM stream_617fc79a355b7f54684a9c8b_node AS unwhe
    ) AS al
    WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['场景和受众'])
))

Query id: 7869d92a-b699-4aee-90ae-df1d38676cef

┌─count()─┐
│      41 │
└─────────┘

1 rows in set. Elapsed: 0.100 sec. Processed 2.69 million rows, 78.37 MB (26.89 million rows/s., 782.14 MB/s.)

Add Table Index

SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
    SELECT al._id
    FROM
    (
        SELECT
            unwhe._id,
            arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo,
            `f_7d1_ff35_t_labels$textual` AS af
        FROM stream_616d2986bd17682e66aedbb3_cluster_index AS unwhe
    ) AS al
    WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['消费洞察话题'])
))

Query id: 5d1ef23c-db6a-436e-a56e-9284abbed19c

┌─count()─┐
│   19314 │
└─────────┘

1 rows in set. Elapsed: 0.335 sec. Processed 899.02 thousand rows, 36.81 MB (2.68 million rows/s., 109.76 MB/s.)
SELECT count(*)
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0
WHERE hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物']) AND (_id IN (
    SELECT al._id
    FROM
    (
        SELECT
            unwhe._id,
            arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo,
            `f_aq2_20f9_t_labels$textual` AS af
        FROM stream_617fc79a355b7f54684a9c8b_node_index AS unwhe
    ) AS al
    WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(af), ['场景和受众'])
))

Query id: 448f0888-233f-4487-a407-efd44ca9666e

┌─count()─┐
│      41 │
└─────────┘

1 rows in set. Elapsed: 0.102 sec. Processed 2.69 million rows, 75.55 MB (26.39 million rows/s., 740.16 MB/s.)

Default (optimize query)

SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
    SELECT _id
    FROM stream_616d2986bd17682e66aedbb3_cluster
    WHERE ((arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
))

Query id: 384305f9-f050-48e6-90b4-c70191be4bcc

┌─count()─┐
│   19314 │
└─────────┘

1 rows in set. Elapsed: 0.110 sec. Processed 899.03 thousand rows, 36.81 MB (8.18 million rows/s., 334.82 MB/s.)

ch1 :) SELECT count(*) FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0 WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id in (select _id from  stream_616d2986bd17682e66aedbb3_cluster where arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]= '消费洞察话题' AND hasAll(arrayFlatten(f_7d1_ff35_t_labels$textual), ['消费洞察话题'])))

SELECT count(*)
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖']) AND (_id IN (
    SELECT _id
    FROM stream_616d2986bd17682e66aedbb3_cluster
    WHERE ((arrayJoin(`f_7d1_ff35_t_labels$textual`)[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
))

Query id: 832de13b-21bf-4730-af9a-159ba058b186

┌─count()─┐
│   19314 │
└─────────┘

1 rows in set. Elapsed: 0.120 sec. Processed 899.03 thousand rows, 36.81 MB (7.48 million rows/s., 306.30 MB/s.)

Taxonomy node ngrams

stream_616d2986bd17682e66aedbb3ClickHouse Cluster (/ms)PGPool (/ms)
Default453
Add Index4173689.966
Add Index + View1621.117
Default(optimize query)53
stream_617fc79a355b7f54684a9c8bClickHouse Cluster (/ms)PGPool (/ms)
Default245
Add Index241
Add Index + View

clickhouse cluster

default

SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster AS tab0
) AS tab1
WHERE ((tab0._id IN (
    SELECT _id
    FROM
    (
        SELECT
            unwhe._id,
            `f_7d1_ff35_t_labels$textual`,
            arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo
        FROM stream_616d2986bd17682e66aedbb3_cluster AS unwhe
    ) AS al
    WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
)) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: bdfbbb01-9ec9-450d-bf43-ae734135aca6

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.453 sec. Processed 1.80 million rows, 205.28 MB (3.97 million rows/s., 452.99 MB/s.)
SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node AS tab0
) AS tab1
WHERE ((tab0._id IN (
    SELECT _id
    FROM
    (
        SELECT
            unwhe._id,
            `f_aq2_20f9_t_labels$textual`,
            arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo
        FROM stream_617fc79a355b7f54684a9c8b_node AS unwhe
    ) AS al
    WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(`f_aq2_20f9_t_labels$textual`), ['场景和受众'])
)) AND hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: af92eb8c-6231-43b4-846b-1df8588b0bef

┌─diffcol0─────────────────────────┬─count─┐
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
│ ['场景和受众','受众人群','老人']3 │
└──────────────────────────────────┴───────┘

5 rows in set. Elapsed: 0.245 sec. Processed 5.39 million rows, 161.96 MB (22.00 million rows/s., 661.20 MB/s.)

Add Table Index

SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_7d1_ff35_t_labels$textual`) AS col0
    FROM stream_616d2986bd17682e66aedbb3_cluster_index AS tab0
) AS tab1
WHERE ((tab0._id IN (
    SELECT _id
    FROM
    (
        SELECT
            unwhe._id,
            `f_7d1_ff35_t_labels$textual`,
            arrayJoin(`f_7d1_ff35_t_labels$textual`) AS oo
        FROM stream_616d2986bd17682e66aedbb3_cluster_index AS unwhe
    ) AS al
    WHERE ((oo[1]) = '消费洞察话题') AND (_id = al._id) AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题'])
)) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 3f4e58b3-5459-4068-8528-ab6dc73b2a75

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.417 sec. Processed 1.80 million rows, 205.28 MB (4.31 million rows/s., 492.56 MB/s.)
SELECT
    col0 AS diffcol0,
    count('*') AS count
FROM stream_617fc79a355b7f54684a9c8b_cluster_index AS tab0,
(
    SELECT
        tab0._id,
        arrayJoin(`f_aq2_20f9_t_labels$textual`) AS col0
    FROM stream_617fc79a355b7f54684a9c8b_node_index AS tab0
) AS tab1
WHERE ((tab0._id IN (
    SELECT _id
    FROM
    (
        SELECT
            unwhe._id,
            `f_aq2_20f9_t_labels$textual`,
            arrayJoin(`f_aq2_20f9_t_labels$textual`) AS oo
        FROM stream_617fc79a355b7f54684a9c8b_node_index AS unwhe
    ) AS al
    WHERE ((oo[1]) = '场景和受众') AND (_id = al._id) AND hasAll(arrayFlatten(`f_aq2_20f9_t_labels$textual`), ['场景和受众'])
)) AND hasAny(`f_fq2_7de8_ngrams$textual`, ['价格_实惠', '很满意_购物'])) AND (tab1._id = tab0._id)
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 2dd710bc-15ef-4479-837d-5da7fe2be7d7

┌─diffcol0─────────────────────────┬─count─┐
│ ['场景和受众','使用场景','早餐']2 │
│ ['场景和受众','使用场景','送礼']2 │
│ ['场景和受众','受众人群','女性']2 │
│ ['场景和受众','受众人群','孩子']32 │
│ ['场景和受众','受众人群','老人']3 │
└──────────────────────────────────┴───────┘

5 rows in set. Elapsed: 0.241 sec. Processed 5.39 million rows, 159.14 MB (22.34 million rows/s., 659.91 MB/s.)

Default (optimize query)

SELECT
    arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
    count('*') AS count
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
WHERE ((diffcol0[1]) = '消费洞察话题') AND hasAll(arrayFlatten(`f_7d1_ff35_t_labels$textual`), ['消费洞察话题']) AND hasAny(`f_n61_af35_ngrams$textual`, ['日期_新鲜', '值得_信赖'])
GROUP BY diffcol0
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 90c63d4d-cc58-436b-8d31-b5445086b367

┌─diffcol0─────────────────────────────────────────────────┬─count─┐
│ ['消费洞察话题','产品体验','产品包装','产品完整性']29 │
│ ['消费洞察话题','产品体验','产品包装','产品形状']4 │
│ ['消费洞察话题','产品体验','产品包装','分量大小']2212 │
│ ['消费洞察话题','产品体验','产品包装','包装易撕性']2 │
│ ['消费洞察话题','产品体验','产品包装','包装是否完好']1221 │
└──────────────────────────────────────────────────────────┴───────┘

125 rows in set. Elapsed: 0.053 sec. Processed 899.03 thousand rows, 198.09 MB (16.92 million rows/s., 3.73 GB/s.)

Taxonomy score default

stream_616d2986bd17682e66aedbb3ClickHouse Cluster (/ms)PGPool (/ms)
Default (optimize query)113
Add Index (optimize query)12528073.313 !!!

clickhouse cluster

Default(optimize query)

SELECT
    arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
    arrayJoin(`f_8d1_ff35_t_scores$numerical`) AS diffcolnest1,
    count('*') AS col2
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY
    diffcol0,
    diffcolnest1
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 276530b7-0988-4a15-8e2d-95fa105fe2ef

┌─diffcol0──────────────────────────────────────────────┬─diffcolnest1─┬───col2─┐
│ ['']                                                  │         ᴺᵁᴸᴸ │ 189159 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']-3465 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']3404 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']1943 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']2808 │
└───────────────────────────────────────────────────────┴──────────────┴────────┘

200 rows in set. Elapsed: 0.113 sec. Processed 899.03 thousand rows, 181.12 MB (7.96 million rows/s., 1.60 GB/s.)

Add Table Index

SELECT
    arrayJoin(`f_7d1_ff35_t_labels$textual`) AS diffcol0,
    arrayJoin(`f_8d1_ff35_t_scores$numerical`) AS diffcolnest1,
    count('*') AS col2
FROM stream_616d2986bd17682e66aedbb3_distributed AS tab0
GROUP BY
    diffcol0,
    diffcolnest1
ORDER BY diffcol0 ASC NULLS FIRST
LIMIT 0, 200

Query id: 276530b7-0988-4a15-8e2d-95fa105fe2ef

┌─diffcol0──────────────────────────────────────────────┬─diffcolnest1─┬───col2─┐
│ ['']                                                  │         ᴺᵁᴸᴸ │ 189159 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']-3465 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']3404 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']1943 │
│ ['消费洞察话题','产品体验','产品包装','产品完整性']2808 │
└───────────────────────────────────────────────────────┴──────────────┴────────┘

200 rows in set. Elapsed: 0.113 sec. Processed 899.03 thousand rows, 181.12 MB (7.96 million rows/s., 1.60 GB/s.)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值