packagecom.mengyao.bigdata.hbase;importjava.io.IOException;importorg.apache.commons.codec.digest.DigestUtils;importorg.apache.commons.lang.StringUtils;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;importorg.apache.hadoop.hbase.client.Connection;importorg.apache.hadoop.hbase.client.ConnectionFactory;importorg.apache.hadoop.hbase.client.HTable;importorg.apache.hadoop.hbase.client.Put;importorg.apache.hadoop.hbase.client.Table;importorg.apache.hadoop.hbase.io.ImmutableBytesWritable;importorg.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;importorg.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;importorg.apache.hadoop.hbase.util.Bytes;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/***
*@authormengyao
* HBase-1.0.1.1、Hadoop-2.6.0
**/
public classBulkLoadApp {private static Configuration conf =HBaseConfiguration.create();private staticString inPath;private staticString outPath;private staticString tableName;static{
conf.set("hbase.zookeeper.quorum", "bdata200,bdata202,bdata203");
conf.set("hbase.zookeeper.property.clientPort", "2181");
}static class BulkLoadMapper extends Mapper{privateImmutableBytesWritable row;
@Overrideprotected void map(LongWritable key, Text value, Context context) throwsIOException, InterruptedException {
String line=value.toString();//id,username,email,birthday,mobile,phone,modified
String[] fields = line.split("\t");
String id= fields[0];
String username= fields[1];
String mail= fields[2];
String birthday= fields[3];
String mobile= fields[4];
String phone= fields[5];
String regtime= fields[6];
String rowKey=DigestUtils.md5Hex(id);
row= newImmutableBytesWritable(Bytes.toBytes(rowKey));
Put put= newPut(Bytes.toBytes(rowKey), System.currentTimeMillis());if (!StringUtils.isEmpty(id)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id"), Bytes.toBytes(id));
}if (!StringUtils.isEmpty(username)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("username"), Bytes.toBytes(username));
}if (!StringUtils.isEmpty(mail)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mail"), Bytes.toBytes(mail));
}if (!StringUtils.isEmpty(birthday) || !birthday.equals("0000-00-00")) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes(birthday));
}if (!StringUtils.isEmpty(mobile)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mobile"), Bytes.toBytes(mobile));
}if (!StringUtils.isEmpty(phone)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(phone));
}if (!StringUtils.isEmpty(regtime)) {
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("modified"), Bytes.toBytes(regtime));
}
context.write(row, put);
}
}static int createJob(String[] args) throwsException {
inPath= args[0];
outPath= args[1];
tableName= args[2];
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(tableName));
Job job=Job.getInstance(conf);
job.setJarByClass(BulkLoadApp.class);
job.setMapperClass(BulkLoadMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
HFileOutputFormat2.configureIncrementalLoad(job, table, connection.getRegionLocator(TableName.valueOf(tableName)));
FileInputFormat.addInputPath(job,newPath(inPath));
FileOutputFormat.setOutputPath(job,newPath(outPath));return job.waitForCompletion(true)?0:1;
}/*** use commond:
* 1、hadoop jar MyJar INPUT_FILE OUTPUT_DIR TABLE_NAME
* hadoop jar bigdata.jar /tag/data/user/haier_user.csv /tag/data/user/haier_user_out tbl_shopuser
* 2、hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles OUTPUT_DIR TABLE_NAME
* hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles /tag/data/user/haier_user_out tbl_shopuser
*@paramargs
*@throwsException*/@SuppressWarnings("deprecation")public static void main(String[] args) throwsException {if (args.length!=3) {
System.out.println("Usage: "+BulkLoadApp.class.getName()+" Input paramters ");
}else{int status =createJob(args);if (status == 0) {
LoadIncrementalHFiles loadHFiles= newLoadIncrementalHFiles(conf);
loadHFiles.doBulkLoad(new Path(outPath), newHTable(conf, Bytes.toBytes(tableName)));
}
System.exit(status);
}
}
}