HBase和MapReduce集成

最新推荐文章于 2024-01-21 03:36:23 发布

原创最新推荐文章于 2024-01-21 03:36:23 发布 · 505 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#HBase和MapReduce集成

HBase 专栏收录该内容

3 篇文章

订阅专栏

配置环境 
[root@quickstart ~]#export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/lib/hbase/lib/*

1、使用MapReduce对Hbase中的一张表数据统计，然后将结果输出到HBase中的另一张表

package com.cxy.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.hbase.client.ConnectionFactory;

public class MR {
	
	public static class MyMapper extends TableMapper<Text,IntWritable>{
		private static IntWritable one = new IntWritable(1);
		private static Text word = new Text();
		@Override
		protected void map(ImmutableBytesWritable key,Result value,Context context)
				throws IOException, InterruptedException {
			//循环得到列族中的列
			for(Cell cell:value.listCells()){
				String[] strs = Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()).split(" ");
				for(String str:strs){
					word.set(str);
					context.write(word, one);
				}
			}
		}
	}
		
	public static class MyReducer extends TableReducer<Text,IntWritable,NullWritable>{
		@Override
		protected void reduce(
				Text key,Iterable<IntWritable> values,Context context)
				throws IOException, InterruptedException {
			int count = 0;
			for(IntWritable value:values){
				count += value.get();
			}
			//参数(ROW_KEY)
			Put put = new Put(Bytes.toBytes(key.toString()));
			//参数(列组,列,值)
			put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(count)));
			context.write(NullWritable.get(), put);
		}
	}

	public static void createHBaseTable(String tableName) throws IOException{
		Configuration conf = HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		Connection conn = ConnectionFactory.createConnection(conf);
		//参数(表名)
		HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
		//添加列组
		desc.addFamily(new HColumnDescriptor("content"));
		Admin admin = conn.getAdmin();
		if(admin.tableExists(TableName.valueOf(tableName))){
			System.out.println("Table exists!");
			System.exit(0);
		}else{
			admin.createTable(desc);
			System.out.println("Create table success");
		}
	}

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		String tableName = "output";
		Configuration conf = HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		createHBaseTable(tableName);
		Job job = Job.getInstance(conf);
		job.setJarByClass(MR.class);
		//通过scan来设置Hbase读取格式
		Scan scan = new Scan();
		scan.addFamily(Bytes.toBytes("words"));
		TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("mr"), scan, MyMapper.class, Text.class, IntWritable.class, job);
		TableMapReduceUtil.initTableReducerJob(tableName, MyReducer.class, job);
		System.exit(job.waitForCompletion(true)?0:1);
	}
}

2、使用MapReduce对Hbase中的一张表数据统计，然后将结果输出到HDFS上

package com.cxy.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MR2 {
	public static class MyMapper extends TableMapper<Text, IntWritable>{
		static Text text = new Text();
		static IntWritable one = new IntWritable(1);
		@Override
		protected void map(ImmutableBytesWritable key,Result value,Context context)
				throws IOException, InterruptedException {
			for(Cell cell:value.listCells()){
				String[] strs = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()).split(" ");
				for(String str:strs){
					text.set(str);
					context.write(text,one);
				}
			}
		}
	}
	public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
		static IntWritable two = new IntWritable(0);
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,Context context)
				throws IOException, InterruptedException {
			int count = 0;
			for(IntWritable value:values){
				count += value.get();
			}
			two.set(count);
			context.write(key,two);
		}
	}
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		Job job = Job.getInstance(conf);
		
		Scan scan = new Scan();
		scan.addFamily(Bytes.toBytes("words"));
		
		job.setJarByClass(MR2.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("mr"), scan, MyMapper.class, Text.class, IntWritable.class, job);
		FileOutputFormat.setOutputPath(job, new Path("/data/hbase/output"));
		System.exit(job.waitForCompletion(true)?0:1);
	}
}

3、使用MapReduce对HDFS中的数据统计，然后将结果输出到HBase中的一张表上

package com.cxy.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class MR3{
	public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
		static Text text = new Text();
		static IntWritable one = new IntWritable(1);
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String[] strs = value.toString().split(" ");
			for(String str:strs){
				text.set(str);
				context.write(text, one);
			}
		}
	}
	
	public static class MyReducer extends TableReducer<Text, IntWritable, NullWritable>{
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,Context context)
				throws IOException, InterruptedException {
			int count = 0;
			for(IntWritable value:values){
				count += value.get();
			}
			Put put = new Put(Bytes.toBytes(key.toString()));
			put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(count)));
			context.write(NullWritable.get(), put);
		}
	}
	
	public static void createHBaseTable(String tableName) throws IOException{
		Configuration conf = HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		Connection conn = ConnectionFactory.createConnection(conf);
		HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
		desc.addFamily(new HColumnDescriptor("content"));
		Admin admin = conn.getAdmin();
		if(admin.tableExists(TableName.valueOf(tableName))){
			System.out.println("Table exists!");
			System.exit(0);
		}else{
			admin.createTable(desc);
			System.out.println("Create table success");
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		//创建HBase表
		createHBaseTable("output2");		
		Job job = Job.getInstance(conf);
		job.setJarByClass(MR3.class);
		
		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);

		FileInputFormat.addInputPath(job, new Path("/data/hbase/input/"));
		TableMapReduceUtil.initTableReducerJob("output2", MyReducer.class, job);
		System.exit(job.waitForCompletion(true)?0:1);
	}
}