业务流程梳理

根据参数生成json文件

{
        "content":[
                {
                        "reader":{
                                "name":"mysqlreader",
                                "parameter":{
                                        "column":[
                                                "表字段1",
                                                "表字段2"
                                        ],
                                        "connection":[
                                                {
                                                        "jdbcUrl":[
                                                                "数据库url"
                                                        ],
                                                        "table":[
                                                                "表名"
                                                        ]
                                                }
                                        ],
                                        "password":"***",
                                        "username":"数据库用户名",
                                        "where":"1=1"
                                }
                        },
                        "writer":{
                                "name":"hdfswriter",
                                "parameter":{
                                        "column":[
                                                {
                                                        "name":"1",
                                                        "type":"STRING"
                                                },
                                                {
                                                        "name":"2",
                                                        "type":"STRING"
                                                }
                                        ],
                                        "compress":"GZIP",
                                        "defaultFS":"hdfs://ns",
                                        "fieldDelimiter":"\t",
                                        "fileName":"webloged2abea76c494576a381b99255ef5e5c",
                                        "fileType":"TEXT",
                                        "hadoopConfig":{
                                                "dfs.client.failover.proxy.provider.ns":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
                                                "dfs.ha.namenodes.ns":"nn1,nn2",
                                                "dfs.namenode.rpc-address.ns.nn1":"node-1:9000",
                                                "dfs.namenode.rpc-address.ns.nn2":"node-2:9000",
                                                "dfs.nameservices":"ns"
                                        },
                                        "path":"/hdfs/ag_admin_v19b690c412a204a9b95133db18631d4aa",
                                        "writeMode":"append"
                                }
                        }
                }
        ],
        "setting":{
                "speed":{
                        "channel":"1"
                }
        }
}

执行datax任务

python /bigdata/datax/bin/datax.py weblogtest.json

创建临时hive外部表

#location后面指定路径和datax导入的路径一样, 这样就不用再查文件名字了
create external table if not exists weblog(id string, time string) row format delimited fields terminated by '\t' stored as textfile location '/hdfs/ag_admin_v19b690c412a204a9b95133db18631d4aa';

从临时表导入数据到标准表

#临时表字段比标准版字段少, 用字符串"null"补齐, 注意顺序,标准表是一个内部表
insert into biao_zhun_biao select "null",id,"null",question,"null",time,"null","null","null" from weblog_textfile;

临时表看情况是否删除

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值