Clickhouse 字典表使用场景

一.字典创建和查询

1.创建表和数据:

drop table t_region;
create table t_region(region_id UInt64, parent_region UInt64, region_name String) ENGINE=TinyLog;
insert into t_region values
(1, 0, 'jiangsu'),(2, 1, 'suzhou'),(3, 2, 'huqiu'),(4, 0, 'anhui'),(5, 4, 'hefei');


创建字典, 指定HIERARCHICAL字段:
DROP DICTIONARY t_dict_region;
CREATE DICTIONARY t_dict_region (
    region_id UInt64,
    parent_region UInt64  HIERARCHICAL,
    region_name String 
)
PRIMARY KEY region_id
SOURCE(CLICKHOUSE(
    host 'localhost'
    port 9001
    user 'default'
    db 'default'
    password ''
    table 't_region'
))
LAYOUT(HASHED())
LIFETIME(30);

2.字典的查询

SELECT dictGetString('default.t_dict_region', 'region_name', toUInt64(2)) AS regionName;

┌─regionName─┐
│ suzhou     │
└────────────┘
SELECT dictGetHierarchy('default.t_dict_region', toUInt64(3));

┌─dictGetHierarchy('default.t_dict_region', toUInt64(3))─┐
│ [3,2,1]                                                │
└────────────────────────────────────────────────────────┘


3.字典数据源之mysql表

  1. 在mysql数据库创建表并插入数据:
drop table test.test_dc;
create table test.test_dc(
  id bigint,
  name varchar(100),
  age int,
  PRIMARY KEY (id)
);

insert into test.test_dc values(1, 'flink', 4);
insert into test.test_dc values(2, 'spark', 6);
insert into test.test_dc values(3, 'clickhouse', 5);

查看MySQL数据:
mysql> select * from test.test_dc;
+----+------------+------+
| id | name       | age  |
+----+------------+------+
|  1 | flink      |    4 |
|  2 | spark      |    6 |
|  3 | clickhouse |    5 |
+----+------------+------+

  1. 在ClickHouse创建字典:
DROP DICTIONARY mydicmysql;
CREATE DICTIONARY mydicmysql
(
    id UInt64,
    name String,
    age UInt8
)
PRIMARY KEY id
SOURCE(MYSQL(
    port 3306
    user 'root'
    password '123456'
    replica(host '127.0.0.1' priority 1)
    replica(host '127.0.0.1' priority 1)
    db 'test'
    table 'test_dc'
    invalidate_query 'select age from test.test_dc where id=3'
))
LAYOUT(FLAT())
LIFETIME(MIN 3 MAX 5);
select dictGet('default.mydicmysql', 'name', toUInt64(2)) as name;

更改mysql的age字段生效。

4.字典的数据源之文件数据源

TabSeparated格式
文件示例:

准备测试数据
文件命名为person.tsv,存放在目录:/var/lib/clickhouse/user_files,字段之间使用制表符分隔,即格式为TabSeparated。数据如下:
1	'id001'	'xiaohe'	23
2	'id002'	'xiaoxue'	25
3	'id003'	'xiaoyu'	26
4	'id004'	'xiaoxi'	27


创建字典:
DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
    id UInt64,
    code String,
    name String,
    age UInt8
)
PRIMARY KEY id
SOURCE(FILE(path '/var/lib/clickhouse/user_files/person.tsv' format 'TabSeparated'))
LAYOUT(FLAT())
LIFETIME(30);


SELECT dictGetString('default.t_dict_person_ddl', 'name', toUInt64(2)) AS regionName;

当然,字典类型的数据也可以通过配置实现

<yandex> 
  <dictionary> 
    <name>t_dict_executable</name>  

    <structure> 
      <id> 
        <name>id</name> 
      </id>  
      <attribute> 
        <name>code</name>  
        <type>String</type>  
        <null_value/> 
      </attribute>  
      <attribute> 
        <name>name</name>  
        <type>String</type>  
        <null_value/> 
      </attribute>  
      <attribute> 
        <name>age</name>  
        <type>UInt8</type>  
        <null_value/> 
      </attribute> 
    </structure>  
    <source> 
      <executable>
        <command>cat /var/lib/clickhouse/user_files/person.tsv</command>
        <format>TabSeparated</format>
      </executable>
    </source>  
    <layout> 
      <hashed/> 
    </layout>  
    <lifetime>10</lifetime> 
  </dictionary> 
</yandex>

二.字典的存储方式

以下测试均在default数据库。

1. flat/hash/sparse_hash/cache

DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
    id UInt64,
    code String,
    name String,
    age UInt8
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(
    host 'localhost'
    port 9001
    user 'default'
    db 'default'
    password ''
    table 't_dic_ch'
    where 'id>0'
))
LAYOUT(CACHE(SIZE_IN_CELLS 10000))
LIFETIME(30);

SELECT dictGetString('default.t_dict_person_ddl', 'name', toUInt64(2)) AS regionName;


FLAT()、HASHED()、SPARSE_HASHED()、CACHE(SIZE_IN_CELLS 10000)

2. complex_key_hashed/complex_key_cache

DROP DICTIONARY t_dict_person_ddl;
CREATE DICTIONARY t_dict_person_ddl
(
    id UInt64,
    code String,
    name String,
    age UInt8
)
PRIMARY KEY id,code
SOURCE(CLICKHOUSE(
    host 'localhost'
    port 9001
    user 'default'
    db 'default'
    password ''
    table 't_dic_ch'
    where 'id>0'
))
LAYOUT(COMPLEX_KEY_HASHED())
LIFETIME(30);

SELECT dictGet('default.t_dict_person_ddl', 'name', tuple(toUInt64(2), 'id002')) AS name;

COMPLEX_KEY_HASHED()、COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10000)

3. range_hashed

drop table t_hash_range;
create table t_hash_range(id UInt64, start Date, end Date, amount Float32) ENGINE=TinyLog;
insert into t_hash_range values
(123, '2020-03-20', '2020-03-22', 0.15)
(123, '2020-03-23', '2020-03-27', 0.25)
(456, '2020-04-20', '2020-04-30', 0.35)
;

查看数据:
SELECT * FROM t_hash_range;

┌──id─┬──────start─┬────────end─┬─amount─┐
│ 1232020-03-202020-03-220.15 │
│ 1232020-03-232020-03-270.25 │
│ 4562020-04-202020-04-300.35 │
└─────┴────────────┴────────────┴────────┘

创建字典:
DROP DICTIONARY t_dict_hash_range;
CREATE DICTIONARY t_dict_hash_range (
    id UInt64,
    start Date,
    end Date,
    amount Float32
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(
    host 'localhost'
    port 9001
    user 'default'
    db 'default'
    password ''
    table 't_hash_range'
))
LAYOUT(RANGE_HASHED())
RANGE(MIN start MAX end)
LIFETIME(30);

查看id为123的记录,在日期2020-03-21日的amount:
select dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-21')) as amount;


查看id为123的记录,在日期2020-03-25日的amount:
select dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-25')) as amount;

日期之外的记录:
SELECT dictGetFloat32('default.t_dict_hash_range', 'amount', toUInt64(123), toDate('2020-03-29')) AS amount;

4. ip_tire

创建表和测试数据:
drop table t_ip_tire;
create table t_ip_tire(prefix String, asn UInt32, ccode String) ENGINE=TinyLog;
insert into t_ip_tire values
('202.79.32.0/20', 17501, 'NP')
('2620:0:870::/48', 3856, 'US')
('2a02:6b8:1::/48', 13238, 'RU')
('2001:db8::/32', 65536, 'ZZ')
;

查看数据:
SELECT * FROM t_ip_tire;

┌─prefix──────────┬───asn─┬─ccode─┐
│ 202.79.32.0/2017501 │ NP    │
│ 2620:0:870::/483856 │ US    │
│ 2a02:6b8:1::/4813238 │ RU    │
│ 2001:db8::/3265536 │ ZZ    │
└─────────────────┴───────┴───────┘


创建字典:
DROP DICTIONARY t_dict_ip_tire;
CREATE DICTIONARY t_dict_ip_tire (
    prefix String,
    asn UInt32,
    ccode String
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(
    host 'localhost'
    port 9001
    user 'default'
    db 'default'
    password ''
    table 't_ip_tire'
))
LAYOUT(IP_TRIE())
LIFETIME(30);

检索数据:
select
dictGetUInt32('default.t_dict_ip_tire', 'asn', tuple(IPv4StringToNum('202.79.32.22'))) as asn,
dictGetString('default.t_dict_ip_tire', 'ccode', tuple(IPv4StringToNum('202.79.32.22'))) as ccode
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值