clickhouse杂记

### clickhouse内存不足解决
    1、调大内存
    2、减少max_threads的值。     peakmemory峰值内存 < (峰值时)线程数*单个内存使用


### 同步bug
    clickhouse tsv->clickhouse 存在null值的时候需要修改配置input_format_tsv_empty_as_default=1


### 查看进程
    show PROCESSLIST;

### 查询并删除查询
    select query_id,initial_user,initial_address,elapsed,query from system.processes where initial_user='yydata_1';
    KILL QUERY WHERE query_id in (select query_id from system.processes where initial_user='default') SYNC;


### 创建副本表
     CREATE TABLE weck_tmp.test_df  on cluster bigdata_ck_cluster
    (
        `id` Int32 COMMENT '医生id',
        `blean` Int32,
        `tint` Int8,
        `smint` Int16
    )
    ENGINE = ReplicatedMergeTree('/clickhouse/tables/weck_tmp/test_df', '{shard}_{replica}')
    ORDER BY (id)
    SETTINGS index_granularity = 8192 


### 删除profile
    DROP SETTINGS PROFILE max_memory_usage_profile;

### 创建profile
    CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 10240000 min=10240000 max=10240002,max_columns_to_read=2 readonly to a1,a2;

### 修改profile
    ALTER SETTINGS PROFILE IF EXISTS max_memory_usage_profile  SETTINGS max_memory_usage = 10240000 min=10240000 max=10240002,max_columns_to_read=2 readonly/WRITABLE;

### 查看所有profile
    SHOW SETTINGS PROFILES;

### 创建表并赋profile
    CREATE USER test IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'test' SETTINGS profile max_memory_usage_profile;

### 删除用户
    drop USER bigdata_sync on cluster bigdata_ck_cluster;

### 创建用户
    CREATE USER bigdata_sync on cluster bigdata_ck_cluster IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'bigdata_sync@JJN' ;

### 赋权
    grant on cluster bigdata_ck_cluster create table,select,truncate,insert,delete,CREATE VIEW,DROP TABLE,DROP view,CREATE DICTIONARY,drop DICTIONARY,SHOW,alter on  bigdata_sync.* TO bigdata_sync;

### 撤销权限
    REVOKE oncluster bigdata_ck_cluster SELECT ON bigdata_sync.* FROM john;
    set allow_introspection_functions=1;
    REVOKE oncluster bigdata_ck_cluster ALL PRIVILEGES ON bigdata_sync.* FROM john;


### 删除大文件
    sudo touch '/data00/clickhouse/flags/force_drop_table' && sudo chmod 666 '/data00/clickhouse/flags/force_drop_table' 

### 创建库
    create database weck_dim on cluster bigdata_ck_cluster;

### 撤销表授权
    REVOKE ALL PRIVILEGES ON db.table FROM ALL 


### 安装
    Linux
    wget http://172.16.246.252:81/hadoopteam/clickhouse/ck-20.12/clickhouse-client-20.12.3.3-2.noarch.rpm
    wget http://172.16.246.252:81/hadoopteam/clickhouse/ck-20.12/clickhouse-common-static-20.12.3.3-2.x86_64.rpm
    wget http://172.16.246.252:81/hadoopteam/clickhouse/ck-20.12/clickhouse-common-static-dbg-20.12.3.3-2.x86_64.rpm
    wget http://172.16.246.252:81/hadoopteam/clickhouse/ck-20.12/clickhouse-server-20.12.3.3-2.noarch.rpm
    //查看已安装软件
    rpm -qa|grep clickhouse
    //卸载
    rpm -e --nodeps xxx
    //安装
    rpm -ivh --force *.rpm
    
    
    contes
    sudo apt-get install apt-transport-https ca-certificates dirmngr
    sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
    
    echo "deb https://repo.clickhouse.tech/deb/stable/ main/" | sudo tee \
        /etc/apt/sources.list.d/clickhouse.list
    sudo apt-get update
    
    sudo apt-get install -y clickhouse-server clickhouse-client
    sudo service clickhouse-server start
    clickhouse-client


### 数据输出到文件
    1、根据numtask数、shuffle内存数、执行时间将数据导出到csv文件
    clickhouse-client -h clickhouse-offline1.bigdata.guahao-office.com  -u xxx --password xxx --format_csv_delimiter="|" --database="weck_dw" --query="select replaceAll(description,'\n',' '),max(numTasks),max(dff_time) as dff_time,max(shuffleReadGB) from (select datediff('second',toDateTime(substring(replaceAll(replaceAll(submissionTime,'GMT',''),'T',' '),1,19)),toDateTime(substring(replaceAll(replaceAll(if(completionTime='','1970-01-01 01:01:01',completionTime),'GMT',''),'T',' '),1,19))) as dff_time,description,numTasks,toInt64(shuffleReadBytes)/1024/1024/1024 as shuffleReadGB,submissionTime,completionTime from weck_dw.sparksql_stage_monitor_df where (numTasks>3000  or toInt64(shuffleReadBytes)/1024/1024/1024>10 ) and  etltime>'2021-04-15' and submissionTime!='' and completionTime!='') group by description FORMAT CSV" > numTasks.csv

    2、将文件下载并上传到hdfs(/user/duyf/numTasks.csv)
    3、创建hive表
    create table wedw_tmp.test_df(sql string,numTasks string,executorRunTime string,shuffleReadBytes string ) row format delimited fields terminated by '|';

    4、加载数据到表
    load data inpath'/user/duyf/numTasks.csv' overwrite into table wedw_tmp.test_df
    
    5、查询方式
    select replace(sql,"\"","") as sql,replace(numtasks,"\"","") as numtasks,cast(replace(executorruntime,"\"","") as int) as executorruntime,shufflereadbytes as shufflereadGB  from wedw_tmp.test_df where cast(executorruntime as int)>100 limit 5;
    
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值