1.准备数据
1.1 clickhouse建表并插入数据
CREATE TABLE cell_towers_10
(
radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
mcc UInt16,
net UInt16,
area UInt16,
cell UInt64,
unit Int16,
lon Float64,
lat Float64,
range UInt32,
samples UInt32,
changeable UInt8,
created DateTime,
updated DateTime,
averageSignal UInt8
)
ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
INSERT INTO datasets.cell_towers_10 (radio,mcc,net,area,cell,unit,lon,lat,`range`,samples,changeable,created,updated,averageSignal) VALUES
('CDMA',302,86,130,4113,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:44','2017-09-15 10:10:44',0),
('CDMA',302,86,130,0,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:45','2017-09-15 10:10:45',0),
('CDMA',302,86,130,4114,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:46','2017-09-15 10:10:46',0),
('CDMA',302,1168,15002,59995,-1,-79.462952,44.009564,1000,7,1,'2017-09-14 19:22:48','2017-09-14 19:30:04',0),
('CDMA',302,1168,15002,59506,-1,-79.522812,43.79319,1000,1,1,'2017-09-14 19:57:04','2017-09-14 20:33:33',0),
('CDMA',302,1168,15004,60815,-1,-79.315284,43.838686,1000,7,1,'2017-09-14 20:22:45','2017-09-14 21:06:28',0),
('CDMA',302,1168,15002,59507,-1,-79.459198,43.797741,1000,3,1,'2017-09-14 20:38:37','2017-09-14 21:20:47',0),
('CDMA',302,1168,15002,59946,-1,-79.462547,44.01469,1000,1,1,'2017-09-14 22:19:45','2017-09-14 22:56:46',0),
('CDMA',302,1168,16000,14113,-1,-80.480919,43.435841,1000,1,1,'2017-09-14 22:53:59','2017-09-15 00:22:24',0),
('CDMA',302,1168,15004,60516,-1,-79.37619,43.84483,1000,2,1,'2017-09-14 23:11:07','2017-09-15 00:57:57',0);
1.2 hive中建表
CREATE TABLE ck_cell_towers_10
(
radio string,
mcc smallint,
net smallint,
area int,
cell bigint,
unit smallint,
lon double,
lat double,
range_a int,
samples int,
changeable tinyint,
created date,
updated date,
averageSignal tinyint
)row format delimited fields terminated by ",";
2. 准备工作
由于Datax没有clickhousereader组件,用rdbmsreader替代。
需要把clickhousewriter/libs下的所有jar包复制到rdbmsreader/libs下,同名jar包直接替换,另外,删掉rm -f guava-r05.jar这个包,否则会报错。
修改plugin.json文件:在"driver" 增加 "ru.yandex.clickhouse.ClickHouseDriver"。
编辑json文件时,name改为rdbmsreader。
"name": "rdbmsreader"
3. 创建任务
可以在datax-web中创建任务生成json,也可以直接编辑json
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "rdbmsreader",
"parameter": {
"username": "yRjwDFuoPKlqya9h9H2Amg==",
"password": "yRjwDFuoPKlqya9h9H2Amg==",
"column": [
"radio",
"mcc",
"net",
"area",
"cell",
"unit",
"lon",
"lat",
"range",
"samples",
"changeable",
"created",
"updated",
"averageSignal"
],
"splitPk": "",
"connection": [
{
"table": [
"cell_towers"
],
"jdbcUrl": [
"jdbc:clickhouse://10.16.60.44:8123/datasets"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://10.16.60.31:8020",
"fileType": "text",
"path": "/user/hive/warehouse/datasets.db/ck_cell_towers",
"fileName": "ck_cell_towers",
"writeMode": "append",
"fieldDelimiter": ",",
"column": [
{
"name": "radio",
"type": "string"
},
{
"name": "mcc",
"type": "smallint"
},
{
"name": "net",
"type": "smallint"
},
{
"name": "area",
"type": "int"
},
{
"name": "cell",
"type": "bigint"
},
{
"name": "unit",
"type": "smallint"
},
{
"name": "lon",
"type": "double"
},
{
"name": "lat",
"type": "double"
},
{
"name": "range_a",
"type": "int"
},
{
"name": "samples",
"type": "int"
},
{
"name": "changeable",
"type": "tinyint"
},
{
"name": "created",
"type": "date"
},
{
"name": "updated",
"type": "date"
},
{
"name": "averagesignal",
"type": "tinyint"
}
]
}
}
}
]
}
}