一、前言
今天有一个批量写入多张集群表数据的任务。要求集群每个节点都要写入一定数据。于是我多花了几个小时,写了一个灵活的批量写入代码。以后再有类似任务可以直接拿来用。经测试是没问题的,但是细节可能写的不好。有空再改。
主要解决问题:Clickhouse批量写入多个节点数据。
二、开始
1、首先是Clickhouse的数据源初始化。正常使用是考虑使用BalancedClickhouseDataSource,但是我想代码控制写入数据量的均匀,所以使用的ClickHouseDataSource。
public class ClickhouseDataSource {
private volatile static List<DataSource> sources = null;
public static List<DataSource> getDataSource() {
if (sources == null) {
synchronized (ClickhouseDataSource.class) {
if (sources == null) {
List<DataSource> result = Lists.newArrayList();
List<Map<String, Object>> collect = ConfigUtils.getConfig().getList("store.clickhouse.connect").stream().map(configValue -> (Map<String, Object>) configValue.unwrapped())
.collect(Collectors.toList());
List<String> urlList = collect.stream().map(node -> (String) node.get("connection-url")).collect(Collectors.toList());
Map<String, Object> unwrapped = (Map<String, Object>) ConfigUtils.getConfig().getValue("store.clickhouse.param").unwrapped();
ClickHouseProperties ckProperties = new ClickHouseProperties();
ckProperties.setMaxBlockSize(80000 * 1000);
ckProperties.setMaxMemoryUsage(300000000000L);
ckProperties.setMaxTotal(1000);
ckProperties.setUseServerTimeZone(false);
ckProperties.setUseServerTimeZoneForDates(false);
ckProperties.setUseTimeZone((String) unwrapped.get("zone"));
ckProperties.setDefaultMaxPerRoute(500);
ckProperties.setConnectionTimeout(1500 * 1000);
ckProperties.setKeepAliveTimeout(-1);
ckProperties.setSocketTimeout(Integer.MAX_VALUE);
//ckProperties.setUser((String) unwrapped.get("user"));
//ckProperties.setPassword((String) unwrapped.get("password"));
sources = Lists.newArrayList();
for (String url : urlList) {
DataSource ds = new ClickHouseDataSource(url, ckProperties);
sources.add(ds);
}
}
}
}
return sources;
}
}
2、ck 批量写入。将数据转成TSV后写入。ClickhouseDao中通过dbName和tableName查询system.columns来获得列的名称和类型。即知道类型那么即可根据类型来模拟数据
public class BatchStringHttpEntity extends AbstractHttpEntity {
private List<String> batchRows;
public static final String NULL = "\\N";
public BatchStringHttpEntity(List<String> batchRows) {
this.batchRows = batchRows;
}
@Override
public boolean isStreaming() {
return false;
}
@Override
public boolean isRepeatable() {
return true;
}
@Override
public long getContentLength() {
return -1;
}
@Override
public InputStream getContent() throws IOException, IllegalStateException {
throw new UnsupportedOperationException();
}
@Override
public void writeTo(OutputStream outputStream) throws IOException {
for (String row : batchRows) {
outputStream.write(row.getBytes(StreamUtils.UTF_8));
}
}
}
@Slf4j
public class ClickhouseDao {
private DataSource dataSource;
public ClickhouseDao(DataSource dataSourc