目录
2.1 上传hive驱动到rdbmsReader、rdbmsWriter插件的lib目录
2.3 从git clone源码 并在最外层pom.xml注释掉其他组件,只保留 rdbmswriter模块
2.5.1 新增CommonRdbmsWriterOverride.java类(替代CommonRdbmsWriter类)
2.5.3 修改SubCommonRdbmsWriter类为
一. 背景
经测试官方rdbms插件,不支持直接插入hive表
二次开发思路
使用一条insert sql 插入多条数据,类似
insertSql: insert into test_databases.test_partition_sink values(?,?,?,?),(?,?,?,?),(?,?,?,?),(?,?,?,?)
二次开发步骤
2.1 上传hive驱动到rdbmsReader、rdbmsWriter插件的lib目录
hive-jdbc-3.1.1-standalone.jar or hive-jdbc-uber-2.6.5.0-292.jar
[cdp2dev_deloitte@emr-header-1 libs]$ pwd
/home/cdp2dev_deloitte/tmp/jack/datax/plugin/writer/rdbmsreader/libs
[cdp2dev_deloitte@emr-header-1 libs]$ ls
commons-collections-3.0.jar db2jcc4.jar guava-r05.jar logback-core-1.0.13.jar
commons-io-2.4.jar Dm7JdbcDriver16.jar hamcrest-core-1.3.jar plugin-rdbms-util-0.0.1-SNAPSHOT.jar
commons-lang3-3.3.2.jar druid-1.0.15.jar hive-jdbc-3.1.1-standalone.jar slf4j-api-1.7.10.jar
commons-math3-3.1.1.jar edb-jdbc16.jar jconn3-1.0.0-SNAPSHOT.jar
datax-common-0.0.1-SNAPSHOT.jar fastjson-1.1.46.sec10.jar logback-classic-1.0.13.jar
[cdp2dev_deloitte@emr-header-1 libs]$ pwd
/home/cdp2dev_deloitte/tmp/jack/datax/plugin/writer/rdbmswriter/libs
[cdp2dev_deloitte@emr-header-1 libs]$ ls
commons-collections-3.0.jar db2jcc4.jar guava-r05.jar logback-core-1.0.13.jar
commons-io-2.4.jar Dm7JdbcDriver16.jar hamcrest-core-1.3.jar plugin-rdbms-util-0.0.1-SNAPSHOT.jar
commons-lang3-3.3.2.jar druid-1.0.15.jar hive-jdbc-3.1.1-standalone.jar slf4j-api-1.7.10.jar
commons-math3-3.1.1.jar edb-jdbc16.jar jconn3-1.0.0-SNAPSHOT.jar
datax-common-0.0.1-SNAPSHOT.jar fastjson-1.1.46.sec10.jar logback-classic-1.0.13.jar
2.2 修改plugin.json配置文件
[cdp2dev_deloitte@emr-header-1 rdbmswriter]$ pwd
/home/cdp2dev_deloitte/tmp/jack/datax/plugin/writer/rdbmswriter
[cdp2dev_deloitte@emr-header-1 rdbmswriter]$ ls
libs plugin_job_template.json plugin.json rdbmswriter-0.0.1-SNAPSHOT.jar
#在drivers里面加入org.apache.hive.jdbc.HiveDriver
[cdp2dev_deloitte@emr-header-1 rdbmswriter]$ vi plugin.json
{
"name": "rdbmswriter",
"class": "com.alibaba.datax.plugin.reader.rdbmswriter.RdbmsWriter",
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.",
"developer": "alibaba",
"drivers":["dm.jdbc.driver.DmDriver", "com.sybase.jdbc3.jdbc.SybDriver", "com.edb.Driver","org.apache.hive.jdbc.HiveDriver"]
}
2.3 从git clone源码 并在最外层pom.xml注释掉其他组件,只保留 rdbmswriter模块
2.4 原码测试能否编译打包
2.5 修改源码
2.5.1 新增CommonRdbmsWriterOverride.java类(替代CommonRdbmsWriter类)
package com.alibaba.datax.plugin.reader.rdbmswriter;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.plugin.TaskPluginCollector;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.util.RdbmsException;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.rdbms.writer.Constant;
import com.alibaba.datax.plugin.rdbms.writer.util.OriginalConfPretreatmentUtil;
import com.alibaba.datax.plugin.rdbms.writer.util.WriterUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
public class CommonRdbmsWriterOverride {
public CommonRdbmsWriterOverride() {
}
public static class Task {
protected static final Logger LOG = LoggerFactory.getLogger(CommonRdbmsWriter.Task.class);
protected DataBaseType dataBaseType;
private static final String VALUE_HOLDER = "?";
protected String username;
protected String password;
protected String jdbcUrl;
protected String table;
protected List<String> columns;
protected List<String> preSqls;
protected List<String> postSqls;
protected int batchSize;
protected int batchByteSize;
protected int columnNumber = 0;
protected TaskPluginCollector taskPluginCollector;
protected static String BASIC_MESSAGE;
protected static String INSERT_OR_REPLACE_TEMPLATE;
protected String writeRecordSql;
protected String writeMode;
protected boolean emptyAsNull;
protected Triple<List<String>, List<Integer>, List<String>> resultSetMetaData;
protected Integer totalIndex;
protected List<String> partitionColumns;
public Task(DataBaseType dataBaseType) {
this.dataBaseType = dataBaseType;
}
public void init(Configuration writerSliceConfig) {
this.username = writerSliceConfig.getString("username");
this.password = writerSliceConfig.getString("password");
this.jdbcUrl = writerSliceConfig.getString("jdbcUrl");
if (this.jdbcUrl.startsWith("||_dsc_ob10_dsc_||") && this.dataBaseType == DataBaseType.MySql) {
String[] ss = this.jdbcUrl.split("\\|\\|_dsc_ob10_dsc_\\|\\|");
if (ss.length != 3) {
throw DataXException.asDataXException(DBUtilErrorCode.JDBC_OB10_ADDRESS_ERROR, "JDBC OB10格式错误,请联系askdatax");
}
LOG.info("this is ob1_0 jdbc url.");
this.username = ss[1].trim() + ":" + this.username;
this.jdbcUrl = ss[2];
LOG.info("this is ob1_0 jdbc url. user=" + this.username + " :url=" + this.jdbcUrl);
}
this.table = writerSliceConfig.getString("table");
this.columns = writerSliceConfig.getList("column", String.class);
this.partitionColumns = writerSliceConfig.getList("partitionColumns", String.class);
this.columnNumber = this.columns.size();
this.preSqls = writerSliceConfig.getList("preSql", String.class);
this.postSqls = writerSliceConfig.getList("postSql", String.class);
this.batchSize = writerSliceConfig.getInt("batchSize", 2048);
this.batchByteSize = writerSliceConfig.getInt("batchByteSize", 33554432);
this.writeMode = writerSliceConfig.getString("writeMode", "INSERT");
this.emptyAsNull = writerSliceConfig.getBool("emptyAsNull", true);
INSERT_OR_REPLACE_TEMPLATE = writerSliceConfig.getString(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK);
this.writeRecordSql = String.format(INSERT_OR_REPLACE_TEMPLATE, this.table);
BASIC_MESSAGE = String.format("jdbcUrl:[%s], table:[%s]", this.jdbcUrl, this.table);
}
public void prepare(Configuration writerSliceConfig) {
Connection connection = DBUtil.getConnection(this.dataBaseType, this.jdbcUrl, this.username, this.password);
DBUtil.dealWithSessionConfig(connection, writerSliceConfig, this.dataBaseType, BASIC_MESSAGE);
int tableNumber = writerSliceConfig.getInt(Constant.TABLE_NUMBER_MARK);
if (tableNumber != 1) {
LOG.info("Begin to execute preSqls:[{}]. context info:{}.", StringUtils.join(this.preSqls, ";"), BASIC_MESSAGE);
WriterUtil.executeSqls(connection, this.preSqls, BASIC_MESSAGE, this.dataBaseType);
}
DBUtil.closeDBResources((ResultSet) null, (Statement) null, connection);
}
public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector, Connection connection) {
this.taskPluginCollector = taskPluginCollector;
this.resultSetMetaData = DBUtil.getColumnMetaData(connection, this.table, StringUtils.join(this.columns, ","));
this.calcWriteRecordSql();
List<Record> writeBuffer = new ArrayList(this.batchSize);
int bufferBytes = 0;
boolean isBufferBytes;
try {
Record record;
while ((record = recordReceiver.getFromReader()) != null) {
if (record.getColumnNumber() != this.columnNumber) {
throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, String.format("列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", record.getColumnNumber(), this.columnNumber));
}
writeBuffer.add(record);
bufferBytes += record.getMemorySize();
if (writeBuffer.size() >= this.batchSize || bufferBytes >= this.batchByteSize) {
this.doBatchInsert(connection, writeBuffer);
writeBuffer.clear();
bufferBytes = 0;
}
}
if (!writeBuffer.isEmpty()) {
this.doBatchInsert