根据参数生成json文件
{
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"column":[
"表字段1",
"表字段2"
],
"connection":[
{
"jdbcUrl":[
"数据库url"
],
"table":[
"表名"
]
}
],
"password":"***",
"username":"数据库用户名",
"where":"1=1"
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"column":[
{
"name":"1",
"type":"STRING"
},
{
"name":"2",
"type":"STRING"
}
],
"compress":"GZIP",
"defaultFS":"hdfs://ns",
"fieldDelimiter":"\t",
"fileName":"webloged2abea76c494576a381b99255ef5e5c",
"fileType":"TEXT",
"hadoopConfig":{
"dfs.client.failover.proxy.provider.ns":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.namenodes.ns":"nn1,nn2",
"dfs.namenode.rpc-address.ns.nn1":"node-1:9000",
"dfs.namenode.rpc-address.ns.nn2":"node-2:9000",
"dfs.nameservices":"ns"
},
"path":"/hdfs/ag_admin_v19b690c412a204a9b95133db18631d4aa",
"writeMode":"append"
}
}
}
],
"setting":{
"speed":{
"channel":"1"
}
}
}
执行datax任务
python /bigdata/datax/bin/datax.py weblogtest.json
创建临时hive外部表
#location后面指定路径和datax导入的路径一样, 这样就不用再查文件名字了
create external table if not exists weblog(id string, time string) row format delimited fields terminated by '\t' stored as textfile location '/hdfs/ag_admin_v19b690c412a204a9b95133db18631d4aa';
从临时表导入数据到标准表
#临时表字段比标准版字段少, 用字符串"null"补齐, 注意顺序,标准表是一个内部表
insert into biao_zhun_biao select "null",id,"null",question,"null",time,"null","null","null" from weblog_textfile;