Hbase访问方式之Mapreduce

最新推荐文章于 2023-04-16 03:45:52 发布

行者无疆_super

最新推荐文章于 2023-04-16 03:45:52 发布

阅读量1.6w

点赞数

分类专栏： Hadoop hbase

本文链接：https://blog.csdn.net/woshiwanxin102213/article/details/17914083

版权

Hadoop 同时被 2 个专栏收录

14 篇文章

订阅专栏

hbase

6 篇文章

订阅专栏

本文介绍如何使用HBase与MapReduce API集成，通过一个简单的示例，从日志表中统计每个IP访问网站目录的总数。包括配置Job、定义Mapper和Reducer，以及执行MapReduce任务。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

概述：

Hbase对Mapreduce API进行了扩展，方便Mapreduce任务读写HTable数据。

一个简单示例：

说明：从日志表中，统计每个IP访问网站目录的总数

package man.ludq.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

public class ExampleTotalMapReduce{
	public static void main(String[] args) {
		try{
			Configuration config = HBaseConfiguration.create();
			Job job = new Job(config,"ExampleSummary");
			job.setJarByClass(ExampleTotalMapReduce.class);     // class that contains mapper and reducer

			Scan scan = new Scan();
			scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
			scan.setCacheBlocks(false);  // don't set to true for MR jobs
			// set other scan attrs
			//scan.addColumn(family, qualifier);
			TableMapReduceUtil.initTableMapperJob(
					"access-log",        // input table
					scan,               // Scan instance to control CF and attribute selection
					MyMapper.class,     // mapper class
					Text.class,         // mapper output key
					IntWritable.class,  // mapper output value
					job);
			TableMapReduceUtil.initTableReducerJob(
					"total-access",        // output table
					MyTableReducer.class,    // reducer class
					job);
			job.setNumReduceTasks(1);   // at least one, adjust as required

			boolean b = job.waitForCompletion(true);
			if (!b) {
				throw new IOException("error with job!");
			} 
		} catch(Exception e){
			e.printStackTrace();
		}
	}

	public static class MyMapper extends TableMapper<Text, IntWritable>  {

		private final IntWritable ONE = new IntWritable(1);
		private Text text = new Text();

		public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
			String ip = Bytes.toString(row.get()).split("-")[0];
			String url = new String(value.getValue(Bytes.toBytes("info"), Bytes.toBytes("url")));
			text.set(ip+"&"+url);
			context.write(text, ONE);
		}
	}

	public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>  {
		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}

			Put put = new Put(key.getBytes());
			put.add(Bytes.toBytes("info"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));

			context.write(null, put);
		}
	}
}