java hdfs导入hbase_使用MapReduce将HDFS数据导入到HBase（三）

最新推荐文章于 2022-07-29 16:54:50 发布

Tfifthe

最新推荐文章于 2022-07-29 16:54:50 发布

阅读量113

点赞数

文章标签： java hdfs导入hbase

本文链接：https://blog.csdn.net/weixin_28689507/article/details/114708956

版权

packagecom.mengyao.bigdata.hbase;importjava.io.IOException;importorg.apache.commons.codec.digest.DigestUtils;importorg.apache.commons.lang.StringUtils;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;importorg.apache.hadoop.hbase.client.Connection;importorg.apache.hadoop.hbase.client.ConnectionFactory;importorg.apache.hadoop.hbase.client.HTable;importorg.apache.hadoop.hbase.client.Put;importorg.apache.hadoop.hbase.client.Table;importorg.apache.hadoop.hbase.io.ImmutableBytesWritable;importorg.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;importorg.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;importorg.apache.hadoop.hbase.util.Bytes;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/***

*@authormengyao

* HBase-1.0.1.1、Hadoop-2.6.0

**/

public classBulkLoadApp {private static Configuration conf =HBaseConfiguration.create();private staticString inPath;private staticString outPath;private staticString tableName;static{

conf.set("hbase.zookeeper.quorum", "bdata200,bdata202,bdata203");

conf.set("hbase.zookeeper.property.clientPort", "2181");

}static class BulkLoadMapper extends Mapper{privateImmutableBytesWritable row;

@Overrideprotected void map(LongWritable key, Text value, Context context) throwsIOException, InterruptedException {

String line=value.toString();//id,username,email,birthday,mobile,phone,modified

String[] fields = line.split("\t");

String id= fields[0];

String username= fields[1];

String mail= fields[2];

String birthday= fields[3];

String mobile= fields[4];

String phone= fields[5];

String regtime= fields[6];

String rowKey=DigestUtils.md5Hex(id);

row= newImmutableBytesWritable(Bytes.toBytes(rowKey));

Put put= newPut(Bytes.toBytes(rowKey), System.currentTimeMillis());if (!StringUtils.isEmpty(id)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id"), Bytes.toBytes(id));

}if (!StringUtils.isEmpty(username)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("username"), Bytes.toBytes(username));

}if (!StringUtils.isEmpty(mail)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mail"), Bytes.toBytes(mail));

}if (!StringUtils.isEmpty(birthday) || !birthday.equals("0000-00-00")) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes(birthday));

}if (!StringUtils.isEmpty(mobile)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mobile"), Bytes.toBytes(mobile));

}if (!StringUtils.isEmpty(phone)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(phone));

}if (!StringUtils.isEmpty(regtime)) {

put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("modified"), Bytes.toBytes(regtime));

}

context.write(row, put);

}

}static int createJob(String[] args) throwsException {

inPath= args[0];

outPath= args[1];

tableName= args[2];

Connection connection=ConnectionFactory.createConnection(conf);

Table table=connection.getTable(TableName.valueOf(tableName));

Job job=Job.getInstance(conf);

job.setJarByClass(BulkLoadApp.class);

job.setMapperClass(BulkLoadMapper.class);

job.setNumReduceTasks(0);

job.setMapOutputKeyClass(ImmutableBytesWritable.class);

job.setMapOutputValueClass(Put.class);

job.setOutputFormatClass(HFileOutputFormat2.class);

HFileOutputFormat2.configureIncrementalLoad(job, table, connection.getRegionLocator(TableName.valueOf(tableName)));

FileInputFormat.addInputPath(job,newPath(inPath));

FileOutputFormat.setOutputPath(job,newPath(outPath));return job.waitForCompletion(true)?0:1;

}/*** use commond:

* 1、hadoop jar MyJar INPUT_FILE OUTPUT_DIR TABLE_NAME

* hadoop jar bigdata.jar /tag/data/user/haier_user.csv /tag/data/user/haier_user_out tbl_shopuser

* 2、hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles OUTPUT_DIR TABLE_NAME

* hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles /tag/data/user/haier_user_out tbl_shopuser

*@paramargs

*@throwsException*/@SuppressWarnings("deprecation")public static void main(String[] args) throwsException {if (args.length!=3) {

System.out.println("Usage: "+BulkLoadApp.class.getName()+" Input paramters ");

}else{int status =createJob(args);if (status == 0) {

LoadIncrementalHFiles loadHFiles= newLoadIncrementalHFiles(conf);

loadHFiles.doBulkLoad(new Path(outPath), newHTable(conf, Bytes.toBytes(tableName)));

}

System.exit(status);

}

Tfifthe

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
java hdfs导入hbase_使用MapReduce将HDFS数据导入到HBase（三）

packagecom.mengyao.bigdata.hbase;importjava.io.IOException;importorg.apache.commons.codec.digest.DigestUtils;importorg.apache.commons.lang.StringUtils;importorg.apache.hadoop.conf.Configuration;import...
复制链接

扫一扫