一 卸载老版本
1 卸载及删除安装文件
# 查看clickhouse的yum源
yum list installed | grep clickhouse
# 移除clickhouse yum
yum remove -y clickhouse-common-static
yum remove -y clickhouse-server-common
# 移除相关目录
rm -rf /var/lib/clickhouse
rm -rf /etc/clickhouse-*
rm -rf /var/log/clickhouse-server
# 查找其他地方还有没用clickhouse,有的话也删除了
find / -name clickhouse-*
# 检验删除完成没
yum list installed | grep clickhouse
find / -name clickhouse-*
二 安装新版本
1 下载仓库
curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | sudo os=centos dist=7 bash
2 查看安装包
sudo yum list 'clickhouse*'
3 安装服务
sudo yum install -y clickhouse-server clickhouse-client
4 查看安装成功的列表
sudo yum list installed 'clickhouse*'
# 控制台输出
clickhouse-client.noarch
clickhouse-common-static.x86_64
clickhouse-server.noarch
5. clickhouse安装目录
/etc/clickhouse-server #clickhouse服务的配置文件目录,包括:config.xml和users.xml
/etc/clickhouse-client clickhouse客户端的配置文件目录,里面只有一个config.xml并且默认为空
/var/lib/clickhouse clickhouse默认数据目录
/var/log/clickhouse-server clickhouse服务端默认日志目录
...其他的稍微没那么重要,就没列出来
6 创建clickhouse用户,并指定数据目录
useradd clickhouse -d /data/clickhouse -c 'clickhouse server' -s /bin/bash
# 说明:其中-d指定clickhouse的数据目录,目录会自动创建并且权限为clickhouse的用户和组,然后
7 创建clickhouse数据目录和日志目录
# 1 创建数据目录
mkdir /data/clickhouse
chown -R clickhouse:clickhouse /data/clickhouse
# 2 创建日志目录
mkdir /var/log/clickhouse-server
chown -R clickhouse:clickhouse /var/log/clickhouse-server
8 修改clickhouse-server的配置文件
vim /etc/clickhouse-server/config.xml
# 找到该配置,将注释去掉
# 该配置的作用:开启后任意ip都可以连接ch,不配置listen_host,会出现Connection refused的情况
<listen_host>::</listen_host>
# 配置数据存放目录,修改为上面我们创建clickhouse用户指定的数据目录
<path>/data/clickhouse</path>
<tmp_path>/data/clickhouse/tmp/</tmp_path> #tmp会自动创建
# 验证:本文档 第三步-导入数据集,导数据成功后可以去该目录下验证,数据是存放在其data目录下,tmp是临时文件,其他的是元数据之类的
vim /etc/clickhouse-server/users.xml
# 找到default用户下的配置
<password>123456</password>
9 启动服务
# 切换用户
su clickhouse
#启动服务
clickhouse-server start
10 查看服务状态
# 查看clickhouse进程
ps -aux |grep clickhouse
# 查看http端口(默认8123)的监听情况
lsof -i :8123
11 登陆客户端
clickhouse-client -u default -h gda1 --password 123456
三 官方数据集测试-航班飞行数据
1 数据准备
- 建表
CREATE TABLE `ontime` (
`Year` UInt16,
`Quarter` UInt8,
`Month` UInt8,
`DayofMonth` UInt8,
`DayOfWeek` UInt8,
`FlightDate` Date,
`UniqueCarrier` FixedString(7),
`AirlineID` Int32,
`Carrier` FixedString(2),
`TailNum` String,
`FlightNum` String,
`OriginAirportID` Int32,
`OriginAirportSeqID` Int32,
`OriginCityMarketID` Int32,
`Origin` FixedString(5),
`OriginCityName` String,
`OriginState` FixedString(2),
`OriginStateFips` String,
`OriginStateName` String,
`OriginWac` Int32,
`DestAirportID` Int32,
`DestAirportSeqID` Int32,
`DestCityMarketID` Int32,
`Dest` FixedString(5),
`DestCityName` String,
`DestState` FixedString(2),
`DestStateFips` String,
`DestStateName` String,
`DestWac` Int32,
`CRSDepTime` Int32,
`DepTime` Int32,
`DepDelay` Int32,
`DepDelayMinutes` Int32,
`DepDel15` Int32,
`DepartureDelayGroups` String,
`DepTimeBlk` String,
`TaxiOut` Int32,
`WheelsOff` Int32,
`WheelsOn` Int32,
`TaxiIn` Int32,
`CRSArrTime` Int32,
`ArrTime` Int32,
`ArrDelay` Int32,
`ArrDelayMinutes` Int32,
`ArrDel15` Int32,
`ArrivalDelayGroups` Int32,
`ArrTimeBlk` String,
`Cancelled` UInt8,
`CancellationCode` FixedString(1),
`Diverted` UInt8,
`CRSElapsedTime` Int32,
`ActualElapsedTime` Int32,
`AirTime` Int32,
`Flights` Int32,
`Distance` Int32,
`DistanceGroup` UInt8,
`CarrierDelay` Int32,
`WeatherDelay` Int32,
`NASDelay` Int32,
`SecurityDelay` Int32,
`LateAircraftDelay` Int32,
`FirstDepTime` String,
`TotalAddGTime` String,
`LongestAddGTime` String,
`DivAirportLandings` String,
`DivReachedDest` String,
`DivActualElapsedTime` String,
`DivArrDelay` String,
`DivDistance` String,
`Div1Airport` String,
`Div1AirportID` Int32,
`Div1AirportSeqID` Int32,
`Div1WheelsOn` String,
`Div1TotalGTime` String,
`Div1LongestGTime` String,
`Div1WheelsOff` String,
`Div1TailNum` String,
`Div2Airport` String,
`Div2AirportID` Int32,
`Div2AirportSeqID` Int32,
`Div2WheelsOn` String,
`Div2TotalGTime` String,
`Div2LongestGTime` String,
`Div2WheelsOff` String,
`Div2TailNum` String,
`Div3Airport` String,
`Div3AirportID` Int32,
`Div3AirportSeqID` Int32,
`Div3WheelsOn` String,
`Div3TotalGTime` String,
`Div3LongestGTime` String,
`Div3WheelsOff` String,
`Div3TailNum` String,
`Div4Airport` String,
`Div4AirportID` Int32,
`Div4AirportSeqID` Int32,
`Div4WheelsOn` String,
`Div4TotalGTime` String,
`Div4LongestGTime` String,
`Div4WheelsOff` String,
`Div4TailNum` String,
`Div5Airport` String,
`Div5AirportID` Int32,
`Div5AirportSeqID` Int32,
`Div5WheelsOn` String,
`Div5TotalGTime` String,
`Div5LongestGTime` String,
`Div5WheelsOff` String,
`Div5TailNum` String
) ENGINE = MergeTree
PARTITION BY Year
ORDER BY (Carrier, FlightDate)
SETTINGS index_granularity = 8192;
- 数据下载脚本
# 1 编辑下载脚本
vim download.sh
#!/bin/sh
for s in `seq 1987 2017`
do
for m in `seq 1 12`
do
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
done
done
- 导入脚本
# 2 编辑数据导入脚本
vim load.sh
#!/bin/sh
for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client -u default -h hadoop1 --password 123456 --input_format_skip_unknown_fields=1 --query="INSERT INTO ontime FORMAT CSVWithNames"; done
# 注意-h的主机名换成自己的
# --input_format_skip_unknown_fields=1 是跳过不能识别的字段(数据集中存在多余的字段)
2 测试sql
- 参考:参考文章 https://blog.csdn.net/m0_37739193/article/details/79612186
- 案例1-2017年最受欢迎的目的地:
SELECT \
OriginCityName, \
DestCityName, \
count(*) AS flights, \
bar(flights, 0, 20000, 40) \
FROM ontime \
WHERE Year = 2017 \
GROUP BY \
OriginCityName, \
DestCityName \
ORDER BY flights DESC \
LIMIT 20;
3 看下导入的数据是否是存放在我们配置的目录下面的
cd /data/clickhouse
ls
# 目录结构显示
data dictionaries_lib flags metadata metadata_dropped preprocessed_configs status store tmp
四 代码JDBC连接测试
1 依赖引入
pow文件引入
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.2.4</version>
</dependency>
2 测试代码
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ClickHouseJDBCTest {
public static void main(String[] args) {
String sqlDB = "show databases";//查询数据库
String sqlTab = "show tables";//查看表
String sqlCount = "select count(*) count from ontime";//查询ontime数据量
exeSql(sqlDB);
exeSql(sqlTab);
exeSql(sqlCount);
}
public static void exeSql(String sql){
String address = "jdbc:clickhouse://hadoop1:8123/default";
Connection connection = null;
Statement statement = null;
ResultSet results = null;
try {
Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
connection = DriverManager.getConnection(address,"default","123456");
statement = connection.createStatement();
long begin = System.currentTimeMillis();
results = statement.executeQuery(sql);
long end = System.currentTimeMillis();
System.out.println("执行("+sql+")耗时:"+(end-begin)+"ms");
ResultSetMetaData rsmd = results.getMetaData();
List<Map> list = new ArrayList();
while(results.next()){
Map map = new HashMap();
for(int i = 1;i<=rsmd.getColumnCount();i++){
map.put(rsmd.getColumnName(i),results.getString(rsmd.getColumnName(i)));
}
list.add(map);
}
for(Map map : list){
System.err.println(map);
}
} catch (Exception e) {
e.printStackTrace();
}finally {//关闭连接
try {
if(results!=null){
results.close();
}
if(statement!=null){
statement.close();
}
if(connection!=null){
connection.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
- 点击执行,输出
INFO 09-25 10:03:19,757 ClickHouseDriver:42 Driver registered
执行(show databases)耗时:3ms
执行(show tables)耗时:3ms
{name=_temporary_and_external_tables}
{name=default}
{name=system}
{name=ontime}
执行(select count(*) count from ontime)耗时:4ms
{count=895844}
五 DBEver连接测试
1 新建连接:选择clickhouse
2 编辑连接
3 测试连接,连接成功