需要根据maxwell打包生成maxwell的jar包,Release v1.26.2 · zendesk/maxwell · GitHub
1) 项目导入Idea生成maxwell jar文件
2)将生成的jar文件作为依赖库导入项目
3) 配置maven打包设置
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>maxwell_to_clickhouse</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>maxwell_to_clickhouse</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.2.5</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>osm</groupId>
<artifactId>gt-main</artifactId>
<version>1.7</version>
<scope>system</scope>
<systemPath>${project.basedir}/src/main/resource/maxwell-1.26.2.jar</systemPath>
</dependency>
# 这里配置maxwell打包,在mvn执行package的时候,可以将 maxwell.jar打入最终jar文件当中
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.0.28</version>
</dependency>
# 设置clickhouse的驱动
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.3.2</version>
</dependency>
# 配置jdbcTemplate,方便写入db
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>5.1.9.RELEASE</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- any other plugins -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>
4)继承Maxwell抽象类,获取json格式的binlog;并用jdbc的方式写入CH
package org.example;
import com.zendesk.maxwell.MaxwellContext;
import com.zendesk.maxwell.producer.AbstractProducer;
import com.zendesk.maxwell.producer.ProducerFactory;
public class CustomProducerFactory implements ProducerFactory {
public AbstractProducer createProducer(MaxwellContext context)
{
return new CustomProducer(context);
}
}
package org.example;
import com.alibaba.fastjson.JSON;
import com.zendesk.maxwell.MaxwellContext;
import com.zendesk.maxwell.producer.AbstractProducer;
import com.zendesk.maxwell.producer.MaxwellOutputConfig;
import com.zendesk.maxwell.row.RowMap;
import org.bean.Card;
import org.springframework.jdbc.core.JdbcTemplate;
import org.utils.CommonUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TimerTask;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class CustomProducer extends AbstractProducer {
private static MaxwellOutputConfig config=new MaxwellOutputConfig();
private JdbcTemplate ch_template = null;
private long flushTime = 0l;
private static int buffer_switch_tag = 1;
private String insertSql = "insert into `mp_connect`.connect_card_all values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
private List<Object[]> bufferList0_1 = null;
private List<Object[]> bufferList0_2 = null;
public CustomProducer(MaxwellContext context) {
super(context);
config.includesServerId=true; // 配置输出json字段包含serverID
ch_template = CommonUtil.getClickHouseTemplate();
flushTime = System.currentTimeMillis();
// 因为 ArrayList 本身不是线程安全的,通过 Collections.synchronizedList(List<T> list) 可以将其包装成一个线程安全的 List
bufferList0_1 = Collections.synchronizedList(new ArrayList());
bufferList0_2 = Collections.synchronizedList(new ArrayList());
// 开启独立线程每隔5秒批量更新clickhouse
ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(2);
TimerTask timerTask = new TimerTask() {
@Override
public void run() {
batchUpdate2Ch();
//System.out.println("------------TimeTask------------>");
}
};
scheduledThreadPool.scheduleAtFixedRate(timerTask, 1, 5, TimeUnit.SECONDS);
}
@Override
public void push(RowMap r)
{
if (!r.shouldOutput(outputConfig)) {
context.setPosition(r.getNextPosition());
return;
}
try {
String mysql_binlog_json = r.toJSON(outputConfig);
System.out.println("--------js-------->"+mysql_binlog_json);
Card card = JSON.parseObject(JSON.parseObject(mysql_binlog_json).getJSONObject("data").toJSONString(), Card.class);
if(JSON.parseObject(mysql_binlog_json).getString("type").equals("delete")){
card.set_sign(CommonUtil.delete_signl);
}
Object[] one = new Object[]{
card.getId(),
card.getIccid(),
card.getApi_id(),
card.getImei(),
card.getImsi(),
card.getSim(),
card.getAccount_id(),
card.getTenant_id(),
card.getCategoy(),
card.getVirtual_id(),
card.getOperators(),
card.getAccount_open_time(),
card.getActive_time(),
card.getIs_real_name(),
card.getOn_off(),
card.getThreshold(),
card.getNet_status(),
card.getSms_status(),
card.getVoice_status(),
card.getOnline_state(),
card.getRegion_limit(),
card.getUpdate_status_time(),
card.getGet_status_time(),
card.getOfficial_status(),
card.getStatus(),
card.getStatus_push_time(),
card.getCycle_type(),
card.getOfficial_code(),
card.getVersion(),
card.getCreate_time(),
card.getIs_delete(),
card.getModify_time(),
card.getExcess_consume(),
card.getActivate_way(),
card.getHalt_way(),
card.getOfficial_halt_cause(),
card.getSecondary_status(),
card.getIs_limit(),
card.getActivation_cause(),
card.getLimit_speed_value(),
card.getAhead_halt(),
card.getOpen_switch_polling(),
card.getCard_ascription_type(),
card.getIs_limit_control(),
card.getUpdata_limit_control_time(),
card.get_sign()
};
if(buffer_switch_tag == 1){
bufferList0_1.add(one);
}else{
bufferList0_2.add(one);
}
batchUpdate2Ch();
//int rt = ch_template.update(insertSql, one);
//System.out.println("=========>"+rt);
} catch (Exception e) {
e.printStackTrace();
if (!context.getConfig().ignoreProducerError) {
throw new RuntimeException(e);
}
}
if (r.isTXCommit()) {
context.setPosition(r.getNextPosition());
}
}
private synchronized void batchUpdate2Ch() {
if(buffer_switch_tag == 1)
{
if(System.currentTimeMillis() - flushTime >= 5000 && bufferList0_1.size()>0)
{
buffer_switch_tag = 2;
ch_template.batchUpdate(insertSql,bufferList0_1);
bufferList0_1.clear();
}
}else{
if(System.currentTimeMillis() - flushTime >= 5000 && bufferList0_2.size()>0)
{
buffer_switch_tag = 1;
ch_template.batchUpdate(insertSql,bufferList0_2);
bufferList0_2.clear();
}
}
flushTime = System.currentTimeMillis();
}
}
public static JdbcTemplate getClickHouseTemplate(){
try {
//1.加载配置文件
Map<String, Object> map = new HashMap();
map.put("url", "jdbc:clickhouse://localhost:8123/?socket_timeout=7200000&send_timeout=3600&receive_timeout=3600&distributed_ddl_task_timeout=4800");
map.put("username", "root");
map.put("password", "123123");
map.put("initialSize", "3");
map.put("maxActive", "10");
map.put("driverClassName", "ru.yandex.clickhouse.ClickHouseDriver");
//2.初始化连接池对象
DataSource ds = DruidDataSourceFactory.createDataSource(map);
JdbcTemplate template = new JdbcTemplate(ds);
return template;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
5) maven生成jar文件,在将jar文件拷贝到maxwell/lib目录当中;相当于是定制化的Maxwell的插件
6) maxwell配置启动文件 properties
# tl;dr config java控制台输出System.out.print() 级别是 info
log_level=info
# 配置生产者
producer=custom_producer
custom_producer.factory=org.example.CustomProducerFactory
# 下面是clickhouse的配置信息
custom_producer.host=localhost
custom_producer.port=8123
custom_producer.db=xxx_db
custom_producer.user=root
custom_producer.password=123123
# 下面是mysql的配置信息
host=localhost
port=8621
user=xxx_mydb
password=123123
# 下面是数据库和表的过滤信息,就是哪些库、哪些表需要同步的
#filter='exclude: *.*, include: mp-connect.connect_card_1'
filter= exclude: *.*, include: mp-connect./connect_card_\\d+/
bootstrapper=sync
7) maxwell启动
./bin/maxwell --filter='exclude: *.*, include: mydb.xxx_table_01'
# 未配置过滤器的直接使用这个
./bin/maxwell
8)其他参考资料
centos7部署clickhouse及mysql到ck的数据同步(maxwell方案) - 简书
Mysql数据到Clickhouse同步方案 · 电信-研发中心
https://mvnrepository.com/artifact/org.jgroups/jgroups-raft
Release v1.26.2 · zendesk/maxwell · GitHub
9)异常处理
maxwell报“Could not find first log file name in binary log index file ”
truncate table maxwell.positions 后重新启动即可