hbase中建表语句,此步骤可选
//建表
create 'ns1:product','f1'
//put记录
put 'ns1:product','000','f1:pkid','5000'
put 'ns1:product','000','f1:addtime','2020-09-15'
put 'ns1:product','000','f1:kind','00010007'
put 'ns1:product','000','f1:kindname','红葡萄酒'
put 'ns1:product','000','f1:model','200600127'
put 'ns1:product','000','f1:productname','力士金庄园2006'
put 'ns1:product','000','f1:price1','450'
put 'ns1:product','000','f1:pipai','白马庄'
HbaseWriter 插件实现了从向Hbase中写取数据。在底层实现上,HbaseWriter 通过 HBase 的 Java 客户端连接远程 HBase 服务,并通过 put 方式写入Hbase。
MysqlReader插件官方文档,有介绍各个参数的说明
https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md
Hbase094XWriter & Hbase11XWriter 插件文档,参数说明
https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md
代码示例,编写文件 mysql_hbase.json
部分参数说明
1 Hbase11XWriter中的 versionColumn 此节点,可选,没有写,系统会自动生成一个时间戳作为版本
"versionColumn": {"index": -1,
"value": "123456789"},
2 Hbase11XWriter中的 rowkeyColumn 选项,是用于生成rowkey的规则,
比如这里的规则是 pkid列_kind列_model列
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"pkid",
"addtime",
"kind",
"kindname",
"model",
"productname",
"price1",
"pipai"
],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://158.158.4.52:3306/linshi_1"],
"table": ["view_product_copy"]
}
],
"password": "kn20180806",
"username": "root",
"where": ""
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "kncloud02:2181,kncloud03:2181,kncloud04:2181",
"hbase.cluster.distributed": true,
"zookeeper.znode.parent": "/hbase-unsecure"
},
"table": "ns1:product",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 2,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 4,
"type": "string"
}
],
"column": [
{
"index": 0,
"name": "f1:pkid",
"type": "string"
},
{
"index": 1,
"name": "f1:addtime",
"type": "string"
},
{
"index": 2,
"name": "f1:kind",
"type": "string"
},
{
"index": 3,
"name": "f1:kindname",
"type": "string"
},
{
"index": 4,
"name": "f1:model",
"type": "string"
},
{
"index": 5,
"name": "f1:productname",
"type": "string"
},
{
"index": 6,
"name": "f1:price1",
"type": "string"
},
{
"index": 7,
"name": "f1:pipai",
"type": "string"
}
],
"versionColumn": {
"index": -1,
"value": "123456789"
},
"encoding": "utf-8"
}
}
}
]
}
}
进入datax目录,运行datax命令,开始执行
python bin/datax.py job/mysql_hbase.json
测试用,多版本插入数据 (方便下个文章使用而已)
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"productId",
"title",
"num",
"addTime"
],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://18.18.4.2:3306/linshi_1"],
"table": ["e_product"]
}
],
"password": "n006",
"username": "root",
"where": ""
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "kncloud02:2181,kncloud03:2181,kncloud04:2181",
"hbase.cluster.distributed": true,
"zookeeper.znode.parent": "/hbase-unsecure"
},
"table": "ns1:product",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 1,
"type": "string"
}
],
"column": [
{
"index": 0,
"name": "f1:productId",
"type": "string"
},
{
"index": 1,
"name": "f1:title",
"type": "string"
},
{
"index": 2,
"name": "f1:num",
"type": "string"
},
{
"index": 3,
"name": "f1:addTime",
"type": "string"
}
],
"versionColumn": {
"index": -1,
"value": "22222222222222"
},
"encoding": "utf-8"
}
}
}
]
}
}