datax自定义处理器Transformer

问题:

使用datax同步数据 对方Oracle库字符集编码为 ISO_8859_1/ASCII,设置连接串字符集传输也是乱码

解决办法:

自定义Transformer类

GbkIsConvertToAsciiTransformer.java
package com.alibaba.datax.core.transport.transformer;

import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.transformer.Transformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;


public class GbkIsConvertToAsciiTransformer extends Transformer {
    private static final Logger LOG = LoggerFactory.getLogger(GbkIsConvertToAsciiTransformer.class);

    public GbkIsConvertToAsciiTransformer() {
        setTransformerName("dx_ConvertToAscii");
    }

    @Override
    public Record evaluate(Record record, Object... paras) {

        if (null != record) {
            for (int i = 0; i < record.getColumnNumber(); i++) {
                Column column = record.getColumn(i);
                if (column.getType() == Column.Type.STRING) {
                    String oriValue = column.asString();
                    record.setColumn(i, new StringColumn(convertGbkToAscii(oriValue)));
                }
            }
        }
        return record;
    }

    private String convertGbkToAscii(String gbkString) {
        String isoString = "";
        try {
            isoString = new String(gbkString.getBytes("GBK"), StandardCharsets.ISO_8859_1);
            LOG.info("\n 转换之前的值:{}, \n 转换之后的值{}", gbkString, isoString);
        } catch (Exception e){
            LOG.error(e.getMessage());
        }
        return  isoString;
    }
}

在TransformerRegistry中注册

package com.alibaba.datax.core.transport.transformer;

import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.core.util.container.CoreConstant;
import com.alibaba.datax.core.util.container.JarLoader;
import com.alibaba.datax.transformer.ComplexTransformer;
import com.alibaba.datax.transformer.Transformer;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * no comments.
 * Created by liqiang on 16/3/3.
 */
public class TransformerRegistry {

    private static final Logger LOG = LoggerFactory.getLogger(TransformerRegistry.class);
    private static Map<String, TransformerInfo> registedTransformer = new HashMap<String, TransformerInfo>();

    static {
        /**
         * add native transformer
         * local storage and from server will be delay load.
         */

        registTransformer(new SubstrTransformer());
        registTransformer(new PadTransformer());
        registTransformer(new ReplaceTransformer());
        registTransformer(new FilterTransformer());
        registTransformer(new GroovyTransformer());
        registTransformer(new DigestTransformer());
        /** 注册我们自己自定义的Transformer*/
        registTransformer(new GbkIsConvertToAsciiTransformer());
    }

    public static void loadTransformerFromLocalStorage() {
        //add local_storage transformer
        loadTransformerFromLocalStorage(null);
    }


    public static void loadTransformerFromLocalStorage(List<String> transformers) {

        String[] paths = new File(CoreConstant.DATAX_STORAGE_TRANSFORMER_HOME).list();
        if (null == paths) {
            return;
        }

        for (final String each : paths) {
            try {
                if (transformers == null || transformers.contains(each)) {
                    loadTransformer(each);
                }
            } catch (Exception e) {
                LOG.error(String.format("skip transformer(%s) loadTransformer has Exception(%s)", each, e.getMessage()), e);
            }

        }
    }

    public static void loadTransformer(String each) {
        String transformerPath = CoreConstant.DATAX_STORAGE_TRANSFORMER_HOME + File.separator + each;
        Configuration transformerConfiguration;
        try {
            transformerConfiguration = loadTransFormerConfig(transformerPath);
        } catch (Exception e) {
            LOG.error(String.format("skip transformer(%s),load transformer.json error, path = %s, ", each, transformerPath), e);
            return;
        }

        String className = transformerConfiguration.getString("class");
        if (StringUtils.isEmpty(className)) {
            LOG.error(String.format("skip transformer(%s),class not config, path = %s, config = %s", each, transformerPath, transformerConfiguration.beautify()));
            return;
        }

        String funName = transformerConfiguration.getString("name");
        if (!each.equals(funName)) {
            LOG.warn(String.format("transformer(%s) name not match transformer.json config name[%s], will ignore json's name, path = %s, config = %s", each, funName, transformerPath, transformerConfiguration.beautify()));
        }
        JarLoader jarLoader = new JarLoader(new String[]{transformerPath});

        try {
            Class<?> transformerClass = jarLoader.loadClass(className);
            Object transformer = transformerClass.newInstance();
            if (ComplexTransformer.class.isAssignableFrom(transformer.getClass())) {
                ((ComplexTransformer) transformer).setTransformerName(each);
                registComplexTransformer((ComplexTransformer) transformer, jarLoader, false);
            } else if (Transformer.class.isAssignableFrom(transformer.getClass())) {
                ((Transformer) transformer).setTransformerName(each);
                registTransformer((Transformer) transformer, jarLoader, false);
            } else {
                LOG.error(String.format("load Transformer class(%s) error, path = %s", className, transformerPath));
            }
        } catch (Exception e) {
            //错误funciton跳过
            LOG.error(String.format("skip transformer(%s),load Transformer class error, path = %s ", each, transformerPath), e);
        }
    }

    private static Configuration loadTransFormerConfig(String transformerPath) {
        return Configuration.from(new File(transformerPath + File.separator + "transformer.json"));
    }

    public static TransformerInfo getTransformer(String transformerName) {

        TransformerInfo result = registedTransformer.get(transformerName);

        //if (result == null) {
        //todo 再尝试从disk读取
        //}

        return result;
    }

    public static synchronized void registTransformer(Transformer transformer) {
        registTransformer(transformer, null, true);
    }

    public static synchronized void registTransformer(Transformer transformer, ClassLoader classLoader, boolean isNative) {

        checkName(transformer.getTransformerName(), isNative);

        if (registedTransformer.containsKey(transformer.getTransformerName())) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_DUPLICATE_ERROR, " name=" + transformer.getTransformerName());
        }

        registedTransformer.put(transformer.getTransformerName(), buildTransformerInfo(new ComplexTransformerProxy(transformer), isNative, classLoader));

    }

    public static synchronized void registComplexTransformer(ComplexTransformer complexTransformer, ClassLoader classLoader, boolean isNative) {

        checkName(complexTransformer.getTransformerName(), isNative);

        if (registedTransformer.containsKey(complexTransformer.getTransformerName())) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_DUPLICATE_ERROR, " name=" + complexTransformer.getTransformerName());
        }

        registedTransformer.put(complexTransformer.getTransformerName(), buildTransformerInfo(complexTransformer, isNative, classLoader));
    }

    private static void checkName(String functionName, boolean isNative) {
        boolean checkResult = true;
        if (isNative) {
            if (!functionName.startsWith("dx_")) {
                checkResult = false;
            }
        } else {
            if (functionName.startsWith("dx_")) {
                checkResult = false;
            }
        }

        if (!checkResult) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_NAME_ERROR, " name=" + functionName + ": isNative=" + isNative);
        }

    }

    private static TransformerInfo buildTransformerInfo(ComplexTransformer complexTransformer, boolean isNative, ClassLoader classLoader) {
        TransformerInfo transformerInfo = new TransformerInfo();
        transformerInfo.setClassLoader(classLoader);
        transformerInfo.setIsNative(isNative);
        transformerInfo.setTransformer(complexTransformer);
        return transformerInfo;
    }

    public static List<String> getAllSuportTransformer() {
        return new ArrayList<String>(registedTransformer.keySet());
    }
}
job.json
{
	"job": {
		"setting": {
			"speed": {
				"byte": 10485760
			}
		},
		"content": [{
			"reader": {
				"name": "mysqlreader",
				"parameter": {
					"username": "${rusername}",
					"password": "${rpassword}",
					"connection": [{
						"jdbcUrl": ["jdbc:mysql://192.168.1.9:3928/test?useSSL=false"],
						"querySql": [
							"SELECT '大渡桥横铁索寒。更喜岷山千里雪,三军过后尽开颜。' as test, '大渡桥横铁索寒。更喜岷山千里雪,' as TEST1, '三军过后尽开颜。' as TEST2, '你好' as TEST3,'测试' as TEST4 FROM test.test WHERE 1=1 LIMIT 1 "
						]
					}]
				}
			},
			"writer": {
				"name": "oraclewriter",
				"parameter": {
					"batchSize": 1024,
					"username": "${wusername}",
					"password": "${wpassword}",
					"column": ["TEST", "TEST1", "TEST2", "TEST3", "TEST4"],
					"connection": [{
						"jdbcUrl": "jdbc:oracle:thin:@(DESCRIPTION = (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.1.3)(PORT = 1521)) (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.1.5)(PORT = 1521)) (LOAD_BALANCE = yes)(failover=on)(connect_data= (service_name = orcl)))",
						"table": [
							"TEST"
						]
					}]
				}
			},
			//新增节点
			"transformer": [{
				//name为我们自定义的transformer中起的名字
                public GbkIsConvertToAsciiTransformer() {
                    setTransformerName("dx_ConvertToAscii");
                  }
	            "name": "dx_ConvertToAscii",
                "parameter": {
				   "columnIndex":1
				}
            }]
		}]
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值