clickhouse

1、创建表

CREATE TABLE queryDatatest ON CLUSTER default_cluster
(
  `query` String,
  `partition_stat_date` Date
)
ENGINE = MergeTree(partition_stat_date,
 query);

CREATE TABLE hm.dis_queryIpData ON CLUSTER default_cluster
AS queryIpData
ENGINE = Distributed(default_cluster, queryIpData, rand());

2、删数据

(1)删表

DROP table queryData ON CLUSTER default_cluster;

(2)删账期

ALTER TABLE queryData ON CLUSTER default_cluster DELETE WHERE partition_stat_date = '2023-02-18’;

(3)删字段

alter table Data on cluster default_cluster drop column `ip`

(4)清空表

truncate table ipDatatest ON CLUSTER default_cluster ;

3、查数据

clickhouse-client -h ip -m -u admin --password 密码 --database="table" --query="select distinct(query),query_num from queryData where partition_stat_date = '2021-06-06' order by query_num desc limit 50000 FORMAT CSV" > test.csv

4、Python调用

CLICKHOUSE_URL_NEW = "jdbc:clickhouse://ip:port/db"
CLICKHOUSE_DRIVER_NEW = 'ru.yandex.clickhouse.ClickHouseDriver'


set spark = SparkSession \
            .builder \
            .appName("clickhouse_data") \
            .enableHiveSupport() \
            .config("spark.master", 'yarn') \
            .config("spark.debug.maxToStringFields", "100") \
            .config("spark.sql.adaptive.enabled", "true") \
            .config("spark.sql.adaptive.allowAdditionalShuffle", "true") \
            .config("spark.sql.adaptive.autoCalculateInitialPartitionNum ", "true") \
            .config("spark.sql.adaptive.shuffle.targetPostShuffleInputSize", 128000000) \
            .getOrCreate()
        self.spark = spark


clickhouse_prop_new = {'driver': CLICKHOUSE_DRIVER_NEW,
                           "socket_timeout": "300000",
                           "rewriteBatchedStatements": "true",
                           "batchsize": "1000000",
                           "numPartitions": "1",
                           'user': 'admin',
                           'password': '***'}
self.clickhouse_prop_new = clickhouse_prop_new


selectSqlWise = '''select 字段1 from test where stat_date = '{}'''.format(datestr)
result = self.spark.sql(selectSqlWise)
result.show()
result.write.jdbc(url=CLICKHOUSE_URL_NEW, table='Data', mode='append',
                          properties=self.clickhouse_prop_new)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值