1.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class BulkLoad {
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws Exception {
Configuration config = new Configuration();
String[] Args = new GenericOptionsParser(config, args).getRemainingArgs();
if (dfsArgs.length != 3) {
System.out.println("usage: <input> <output> <table>");
System.exit(-1);
}
String inputDir = Args[0];
String outputDir = Args[1];
String tableName = Args[2];
System.out.println("running cmd: input<" + inputDir + "> output<" + outputDir + "> tableName<" + tableName + ">");
Job HFileJob=new Job(config, "ufug");
HFileJob.setJarByClass(BulkLoad.class);
HFileJob.setMapperClass(BulkLoadMapper.class);
HFileJob.setMapOutputKeyClass(Writable.class);
HFileJob.setMapOutputValueClass(KeyValue.class);
FileInputFormat.setInputPaths(HFileJob, inputDir);
FileOutputFormat.setOutputPath(HFileJob, new Path(outputDir));
Configuration hbaseConfiguration=HBaseConfiguration.create();
HTable wordCountTable =new HTable(hbaseConfiguration, tableName);
HFileOutputFormat2.configureIncrementalLoad(HFileJob,wordCountTable);
int HFileJobResult=HFileJob.waitForCompletion(true) ? 0 : 1;
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(hbaseConfiguration);
loader.doBulkLoad(new Path(outputDir), wordCountTable);
System.out.println("SUCCESS");
System.exit(HFileJobResult);
}
}
、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、
2.BulkLoadMap
import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class BulkLoadMap extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {
private static String Family = "ewewf";
private static String FamilyColumn = "asdasd";
private static String[] infoColumes = new String[] { "a", "b",
"c", "d", "e", "fl", "g", "h",
"i"};
private static boolean checkNumber(String str) {
char[] charArray = str.toCharArray();
for (int i = 0; i < charArray.length; i++) {
if (!Character.isDigit(charArray[i])) {
return false;
}
}
return true;
}
private long time = -1;
public static String format = "yyyy.MM.dd:HH:mm:ss";
protected void setup(Context context) throws IOException,InterruptedException {
String date = context.getConfiguration().get("bulkload.date");
if (date == null || date.equals(""))
return;
SimpleDateFormat format = new SimpleDateFormat(format_str);
try {
time = format.parse(date).getTime();
} catch (Exception e) {
}
}
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String infoString = value.toString();
String[] info = infoString.split("\\\");
String tdweaa = null;
String idwwfoaa = null;
String dwaraa = null;
String sdadaa = null;
String dawd = null;
try {
if (info.length != 7 && info.length != 8) {
if (infoString != null && !infoString.equals("")) {
System.out.println("Wrong info!" + value.toString());
context.getCounter(Counter.FIELD_ERROR).increment(1);
}
return;
}
timeSta = info[0];
long putTime = -1;
try {
Date date = MD5.get_op_format().parse(timeSta);
putTime = date.getTime();
} catch (Exception e) {
if (time > 0)
putTime = time;
}
ewrew = info[1];
if (!checkNumber(wrew)) {
context.getCounter(Counter.INFOID_DIGITAL_ERROR).increment(1);
return;
}
byte[] rowKey = Bytes.toBytes(MD5.MD5(afaf));
ImmutableBytesWritable rowKeyWritable = new ImmutableBytesWritable(rowKey);
byte[] family = Bytes.toBytes("cf");
for (int i = 0; i < infoColumes.length; i++) {
family = Bytes.toBytes("cf");
byte[] qualifier = Bytes.toBytes(infoColumes[i]);
byte[] hbaseValue = Bytes.toBytes(info[i + 1]);
if (infoColumes[i].equalsIgnoreCase("SAs")) {
hbaseValue = Bytes.toBytes(idWDWD);
}
if (infoColumes[i].equalsIgnoreCase("WDQ")
|| infoColumes[i].equalsIgnoreCase("DWQ")
|| infoColumes[i].equalsIgnoreCase("QDWWD")) {
String newValue = info[i + 1].toLowerCase();
hbaseValue = Bytes.toBytes(newValue);
}
if (infoColumes[i].equalsIgnoreCase("afeat")) {
family = Bytes.toBytes("caeft");
}
if (infoColumes[i].equalsIgnoreCase("aefD")) {
ueafId = info[i + 1];
}
if (infoColumes[i].equalsIgnoreCase("afa")) {
caaef = info[i + 1];
}
if (infoColumes[i].equalsIgnoreCase("aefe")) {
faaf = info[i + 1];
}
KeyValue keyValue = new KeyValue(rowKey, family, qualifier, hbaseValue);
if (putTime > 0)
keyValue.setTimestamp(putTime);
context.write(rowKeyWritable, keyValue);
}
if (userId != null && cateId != null && state != null) {
String indexRowKey = MD5.MD5(uafa) + "_" + cfwId + "_" + ifwad;
ImmutableBytesWritable indexRowKeyWritable = new ImmutableBytesWritable(Bytes.toBytes(indexRowKey));
byte[] stateValue = Bytes.toBytes(state);
KeyValue keyValue = new KeyValue(Bytes.toBytes(indexRowKey), Bytes.toBytes(indexFamily),
Bytes.toBytes(indexFamilyColumn), stateValue);
if (putTime > 0)
keyValue.setTimestamp(putTime);
context.write(indexRowKeyWritable, keyValue);
}
context.getCounter(Counter.INFOID_SUCCESS).increment(1);
} catch (Exception e) {
System.out.println("InwafD: " + inffwaId + " falue: "
+ value.toString());
}
}
}