Hbase MapReduce Integration

最新推荐文章于 2024-09-16 11:16:36 发布

iteye_3911

最新推荐文章于 2024-09-16 11:16:36 发布

阅读量133

点赞数

分类专栏： hadoop Big Data 文章标签：大数据 java

本文链接：https://blog.csdn.net/iteye_3911/article/details/82486877

版权

hadoop 同时被 2 个专栏收录

6 篇文章 0 订阅

订阅专栏

Big Data

2 篇文章 0 订阅

订阅专栏

Delete Hbase rows example

$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -DstartKey="10000:1365663164575:88888:testhome" -DstopKey="10000:1365663164575:88890:testhome" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"

$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -Dappid="10000" -DstartTime="2010-01-01-01-01" -DstopTime="2014-01-01-01-01" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"

import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MRDeleteRows extends Configured implements Tool {
	String startRowKey;
	String stopRowKey;
	String quorum;
	String table; 
	
	String startTime;
	String stopTime;
	String appID;
	
	public String getStartTime() {
		return startTime;
	}

	public String getStopTime() {
		return stopTime;
	}

	public String getAppID() {
		return appID;
	}

	public String getQuorum() {
		return quorum;
	}

	public String getStartRowKey() {
		return startRowKey;
	}

	public String getStopRowKey() {
		return stopRowKey;
	}
	
	public String getTable() {
		return table;
	}


	@Override
	public int run(String[] arg0) throws Exception {
		Configuration conf = getConf();
		for (Entry<String, String> entry : conf) {
			if (entry.getKey().equals("startKey")) {
				this.startRowKey = entry.getValue();
			}
			if (entry.getKey().equals("stopKey")) {
				this.stopRowKey = entry.getValue();
			}

			if (entry.getKey().equals("quorum")) {
				this.quorum = entry.getValue();
			}
			
			if (entry.getKey().equals("table")) {
				this.table = entry.getValue();
			}
			
			if (entry.getKey().equals("startTime")) {
				this.startTime = entry.getValue();
			}
			if (entry.getKey().equals("stopTime")) {
				this.stopTime = entry.getValue();
			}
			
			if (entry.getKey().equals("appid")) {
				this.appID = entry.getValue();
			}
			 
		}
		return 0;
	}
	
	static String getRowKey(String appID, String time){ 
		
		DateFormat df =  new SimpleDateFormat("yyyy-MM-dd-HH-mm");
		Date date = null;   
		try{
			date = df.parse(time);
		}catch(ParseException e){
			System.out.println("Please input correct date format");
			System.exit(1);
		}
		
		return appID + ":" + date.getTime();
		
	}
	 

	static class DeleteMapper extends
			TableMapper<ImmutableBytesWritable, Delete> {
		public DeleteMapper() {
		} 
		@Override
		public void map(ImmutableBytesWritable row, Result value,
				Context context) throws IOException { 
			ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get());
			try{
				Delete delete = new Delete(row.get());
				context.write(userKey, delete);
			} catch (InterruptedException e){
				e.printStackTrace();
				throw new IOException(e);
			} 
		}

	}
	 

	public static void main(String[] args) throws Exception {
		MRDeleteRows deleteElf = new MRDeleteRows();
		ToolRunner.run(deleteElf, args);
		Configuration config = HBaseConfiguration.create();
		config.set("hbase.zookeeper.quorum", deleteElf.getQuorum());
	 	Job job = new Job(config, "DeleteHbaseRowkeys"); 
		job.setJarByClass(MRDeleteRows.class);
		Scan scan = new Scan();
	 	System.out.println("quorum: " + deleteElf.getQuorum());
		System.out.println("table: " + deleteElf.getTable());
		if(deleteElf.getStartRowKey()!=null && deleteElf.getStopRowKey()!=null){
			System.out.println("startkey: " + deleteElf.getStartRowKey());
			System.out.println("stopkey: " + deleteElf.getStopRowKey());
			scan.setStartRow(deleteElf.getStartRowKey().getBytes());
			scan.setStopRow(deleteElf.getStopRowKey().getBytes()); 
		} 
		if(deleteElf.getAppID()!=null && deleteElf.getStartTime()!=null && deleteElf.getStopTime()!=null){
			System.out.println("AppID: " + deleteElf.getAppID());
			System.out.println("start time: " + deleteElf.getStartTime());
			System.out.println("stop time: " + deleteElf.getStopTime()); 			
			scan.setStartRow(getRowKey(deleteElf.getAppID(),deleteElf.getStartTime()).getBytes());
			scan.setStopRow(getRowKey(deleteElf.getAppID(),deleteElf.getStopTime()).getBytes()); 
		}
		
		scan.setCacheBlocks(false);

		TableMapReduceUtil.initTableMapperJob(deleteElf.getTable(), scan,
				DeleteMapper.class, ImmutableBytesWritable.class, Delete.class,
				job);
		TableMapReduceUtil.initTableReducerJob(deleteElf.getTable(), null, job);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	}

}

Hbase Loader MapReduce Example

import java.io.IOException;
import java.util.Calendar;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

/**
 * Sample Uploader MapReduce
 * <p>
 * This is EXAMPLE code. You will need to change it to work for your context.
 * <p>
 * Uses {@link TableReducer} to put the data into HBase. Change the InputFormat
 * to suit your data. In this example, we are importing a CSV file.
 * <p>
 * 
 * <pre>
 * row,family,qualifier,value
 * </pre>
 * <p>
 * The table and columnfamily we're to insert into must preexist.
 * <p>
 * There is no reducer in this example as it is not necessary and adds
 * significant overhead. If you need to do any massaging of data before
 * inserting into HBase, you can do this in the map as well.
 * <p>
 * Do the following to start the MR job:
 * 
 * <pre>
 * ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
 * </pre>
 * <p>
 * This code was written against HBase 0.21 trunk.
 * 
 * Before running this job, please make sure set HADOOP_CLASSPATH. You need to include zookeeper.jar and hbase-0.90.4-cdh3u3.jar
 */
public class BulkLoaderToHbase {

	private static final String NAME = "BulkLoaderToHbase";
	private static byte[] SYSINFO;
	private static byte[] CONTENT;
	private static byte[] APP_ID;
	private static byte[] ENV;
	private static byte[] HOSTNAME;
	private static byte[] BODY;
	private static byte[] LOG_FILE_NAME;
	private static byte[] LOG_TYPE;
	private static byte[] LOG_FILE_PATH;
	
	private static byte[] appId_v;
	private static byte[] env_v;
	private static byte[] hostname_v;
	private static byte[] logPath_v;
	private static byte[] logFileName_v;
	private static byte[] logType_v;
	
	private static long nano = 0;
	
	static class Uploader extends
			Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

		private long checkpoint = 100;
		private long count = 0;

		@Override
		public void map(LongWritable key, Text line, Context context)
				throws IOException {

			Calendar cal = Calendar.getInstance();
			String rowkey = Bytes.toString(appId_v) + ":" + cal.getTimeInMillis() + ":" + (nano++) + ":" + Bytes.toString(hostname_v);
			byte[] rowKeyValue = Bytes.toBytes(rowkey);
			Put put = new Put(rowKeyValue);

			put.add(SYSINFO, APP_ID, appId_v);
			put.add(SYSINFO, ENV, env_v);
			put.add(SYSINFO, HOSTNAME, hostname_v);
			put.add(CONTENT, BODY, line.getBytes());
			put.add(CONTENT, LOG_FILE_PATH, logPath_v);
			put.add(CONTENT, LOG_FILE_NAME, logFileName_v);
			put.add(CONTENT, LOG_TYPE, logType_v);

			// Uncomment below to disable WAL. This will improve performance but
			// means
			// you will experience data loss in the case of a RegionServer
			// crash.
			// put.setWriteToWAL(false);

			try {
				context.write(new ImmutableBytesWritable(rowKeyValue), put);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}

			// Set status every checkpoint lines
			if (++count % checkpoint == 0) {
				context.setStatus("Emitting Put " + count);
			}
		}
	}

	/**
	 * Job configuration.
	 */
	public static Job configureJob(Configuration conf, String[] args)
			throws IOException {
		
	    SYSINFO = Bytes.toBytes("sysInfo");
	    CONTENT = Bytes.toBytes("content");
	    APP_ID = Bytes.toBytes("appId");
	    ENV = Bytes.toBytes("env");
	    HOSTNAME = Bytes.toBytes("hostName");
	    BODY = Bytes.toBytes("body");
	    LOG_FILE_PATH = Bytes.toBytes("logFilePath");
	    LOG_FILE_NAME = Bytes.toBytes("logFileName");
	    LOG_TYPE = Bytes.toBytes("logType");
		
		Path inputPath = new Path(args[0]);
		String tableName = args[1];
		appId_v = Bytes.toBytes(args[2]);
		env_v = Bytes.toBytes(args[3]);
		hostname_v = Bytes.toBytes(args[4]);
		logPath_v = Bytes.toBytes(args[5]);
		logFileName_v = Bytes.toBytes(args[6]);
		logType_v = Bytes.toBytes(args[7]);
		
		
		Job job = new Job(conf, NAME + "_" + tableName);
		job.setJarByClass(Uploader.class);
		FileInputFormat.setInputPaths(job, inputPath);
		job.setInputFormatClass(TextInputFormat.class);
		job.setMapperClass(Uploader.class);
		// No reducers. Just write straight to table. Call initTableReducerJob
		// because it sets up the TableOutputFormat.
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
		return job;
	}

	/**
	 * Main entry point.
	 * 
	 * @param args
	 *            The command line parameters.
	 * @throws Exception
	 *             When running the job fails.
	 */
	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();
		String[] otherArgs = new GenericOptionsParser(conf, args)
				.getRemainingArgs();
		if (otherArgs.length != 8) {
			System.err
					.println("Wrong number of arguments: " + otherArgs.length);
			System.err.println("Usage: " + NAME + " <input> <tablename> <appId> <env> <hostname> <logpath> <logFileName> <logType>");
			System.exit(-1);
		}
		Job job = configureJob(conf, otherArgs);
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}