Delete Hbase rows example
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -DstartKey="10000:1365663164575:88888:testhome" -DstopKey="10000:1365663164575:88890:testhome" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -Dappid="10000" -DstartTime="2010-01-01-01-01" -DstopTime="2014-01-01-01-01" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MRDeleteRows extends Configured implements Tool {
String startRowKey;
String stopRowKey;
String quorum;
String table;
String startTime;
String stopTime;
String appID;
public String getStartTime() {
return startTime;
}
public String getStopTime() {
return stopTime;
}
public String getAppID() {
return appID;
}
public String getQuorum() {
return quorum;
}
public String getStartRowKey() {
return startRowKey;
}
public String getStopRowKey() {
return stopRowKey;
}
public String getTable() {
return table;
}
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
for (Entry<String, String> entry : conf) {
if (entry.getKey().equals("startKey")) {
this.startRowKey = entry.getValue();
}
if (entry.getKey().equals("stopKey")) {
this.stopRowKey = entry.getValue();
}
if (entry.getKey().equals("quorum")) {
this.quorum = entry.getValue();
}
if (entry.getKey().equals("table")) {
this.table = entry.getValue();
}
if (entry.getKey().equals("startTime")) {
this.startTime = entry.getValue();
}
if (entry.getKey().equals("stopTime")) {
this.stopTime = entry.getValue();
}
if (entry.getKey().equals("appid")) {
this.appID = entry.getValue();
}
}
return 0;
}
static String getRowKey(String appID, String time){
DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH-mm");
Date date = null;
try{
date = df.parse(time);
}catch(ParseException e){
System.out.println("Please input correct date format");
System.exit(1);
}
return appID + ":" + date.getTime();
}
static class DeleteMapper extends
TableMapper<ImmutableBytesWritable, Delete> {
public DeleteMapper() {
}
@Override
public void map(ImmutableBytesWritable row, Result value,
Context context) throws IOException {
ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get());
try{
Delete delete = new Delete(row.get());
context.write(userKey, delete);
} catch (InterruptedException e){
e.printStackTrace();
throw new IOException(e);
}
}
}
public static void main(String[] args) throws Exception {
MRDeleteRows deleteElf = new MRDeleteRows();
ToolRunner.run(deleteElf, args);
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", deleteElf.getQuorum());
Job job = new Job(config, "DeleteHbaseRowkeys");
job.setJarByClass(MRDeleteRows.class);
Scan scan = new Scan();
System.out.println("quorum: " + deleteElf.getQuorum());
System.out.println("table: " + deleteElf.getTable());
if(deleteElf.getStartRowKey()!=null && deleteElf.getStopRowKey()!=null){
System.out.println("startkey: " + deleteElf.getStartRowKey());
System.out.println("stopkey: " + deleteElf.getStopRowKey());
scan.setStartRow(deleteElf.getStartRowKey().getBytes());
scan.setStopRow(deleteElf.getStopRowKey().getBytes());
}
if(deleteElf.getAppID()!=null && deleteElf.getStartTime()!=null && deleteElf.getStopTime()!=null){
System.out.println("AppID: " + deleteElf.getAppID());
System.out.println("start time: " + deleteElf.getStartTime());
System.out.println("stop time: " + deleteElf.getStopTime());
scan.setStartRow(getRowKey(deleteElf.getAppID(),deleteElf.getStartTime()).getBytes());
scan.setStopRow(getRowKey(deleteElf.getAppID(),deleteElf.getStopTime()).getBytes());
}
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(deleteElf.getTable(), scan,
DeleteMapper.class, ImmutableBytesWritable.class, Delete.class,
job);
TableMapReduceUtil.initTableReducerJob(deleteElf.getTable(), null, job);
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}
Hbase Loader MapReduce Example
import java.io.IOException;
import java.util.Calendar;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
* Sample Uploader MapReduce
* <p>
* This is EXAMPLE code. You will need to change it to work for your context.
* <p>
* Uses {@link TableReducer} to put the data into HBase. Change the InputFormat
* to suit your data. In this example, we are importing a CSV file.
* <p>
*
* <pre>
* row,family,qualifier,value
* </pre>
* <p>
* The table and columnfamily we're to insert into must preexist.
* <p>
* There is no reducer in this example as it is not necessary and adds
* significant overhead. If you need to do any massaging of data before
* inserting into HBase, you can do this in the map as well.
* <p>
* Do the following to start the MR job:
*
* <pre>
* ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
* </pre>
* <p>
* This code was written against HBase 0.21 trunk.
*
* Before running this job, please make sure set HADOOP_CLASSPATH. You need to include zookeeper.jar and hbase-0.90.4-cdh3u3.jar
*/
public class BulkLoaderToHbase {
private static final String NAME = "BulkLoaderToHbase";
private static byte[] SYSINFO;
private static byte[] CONTENT;
private static byte[] APP_ID;
private static byte[] ENV;
private static byte[] HOSTNAME;
private static byte[] BODY;
private static byte[] LOG_FILE_NAME;
private static byte[] LOG_TYPE;
private static byte[] LOG_FILE_PATH;
private static byte[] appId_v;
private static byte[] env_v;
private static byte[] hostname_v;
private static byte[] logPath_v;
private static byte[] logFileName_v;
private static byte[] logType_v;
private static long nano = 0;
static class Uploader extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
private long checkpoint = 100;
private long count = 0;
@Override
public void map(LongWritable key, Text line, Context context)
throws IOException {
Calendar cal = Calendar.getInstance();
String rowkey = Bytes.toString(appId_v) + ":" + cal.getTimeInMillis() + ":" + (nano++) + ":" + Bytes.toString(hostname_v);
byte[] rowKeyValue = Bytes.toBytes(rowkey);
Put put = new Put(rowKeyValue);
put.add(SYSINFO, APP_ID, appId_v);
put.add(SYSINFO, ENV, env_v);
put.add(SYSINFO, HOSTNAME, hostname_v);
put.add(CONTENT, BODY, line.getBytes());
put.add(CONTENT, LOG_FILE_PATH, logPath_v);
put.add(CONTENT, LOG_FILE_NAME, logFileName_v);
put.add(CONTENT, LOG_TYPE, logType_v);
// Uncomment below to disable WAL. This will improve performance but
// means
// you will experience data loss in the case of a RegionServer
// crash.
// put.setWriteToWAL(false);
try {
context.write(new ImmutableBytesWritable(rowKeyValue), put);
} catch (InterruptedException e) {
e.printStackTrace();
}
// Set status every checkpoint lines
if (++count % checkpoint == 0) {
context.setStatus("Emitting Put " + count);
}
}
}
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String[] args)
throws IOException {
SYSINFO = Bytes.toBytes("sysInfo");
CONTENT = Bytes.toBytes("content");
APP_ID = Bytes.toBytes("appId");
ENV = Bytes.toBytes("env");
HOSTNAME = Bytes.toBytes("hostName");
BODY = Bytes.toBytes("body");
LOG_FILE_PATH = Bytes.toBytes("logFilePath");
LOG_FILE_NAME = Bytes.toBytes("logFileName");
LOG_TYPE = Bytes.toBytes("logType");
Path inputPath = new Path(args[0]);
String tableName = args[1];
appId_v = Bytes.toBytes(args[2]);
env_v = Bytes.toBytes(args[3]);
hostname_v = Bytes.toBytes(args[4]);
logPath_v = Bytes.toBytes(args[5]);
logFileName_v = Bytes.toBytes(args[6]);
logType_v = Bytes.toBytes(args[7]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
/**
* Main entry point.
*
* @param args
* The command line parameters.
* @throws Exception
* When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 8) {
System.err
.println("Wrong number of arguments: " + otherArgs.length);
System.err.println("Usage: " + NAME + " <input> <tablename> <appId> <env> <hostname> <logpath> <logFileName> <logType>");
System.exit(-1);
}
Job job = configureJob(conf, otherArgs);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}