配置环境
[root@quickstart ~]#export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/lib/hbase/lib/*
1、使用MapReduce对Hbase中的一张表数据统计,然后将结果输出到HBase中的另一张表
package com.cxy.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.hbase.client.ConnectionFactory;
public class MR {
public static class MyMapper extends TableMapper<Text,IntWritable>{
private static IntWritable one = new IntWritable(1);
private static Text word = new Text();
@Override
protected void map(ImmutableBytesWritable key,Result value,Context context)
throws IOException, InterruptedException {
//循环得到列族中的列
for(Cell cell:value.listCells()){
String[] strs = Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()).split(" ");
for(String str:strs){
word.set(str);
context.write(word, one);
}
}
}
}
public static class MyReducer extends TableReducer<Text,IntWritable,NullWritable>{
@Override
protected void reduce(
Text key,Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int count = 0;
for(IntWritable value:values){
count += value.get();
}
//参数(ROW_KEY)
Put put = new Put(Bytes.toBytes(key.toString()));
//参数(列组,列,值)
put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(count)));
context.write(NullWritable.get(), put);
}
}
public static void createHBaseTable(String tableName) throws IOException{
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
Connection conn = ConnectionFactory.createConnection(conf);
//参数(表名)
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
//添加列组
desc.addFamily(new HColumnDescriptor("content"));
Admin admin = conn.getAdmin();
if(admin.tableExists(TableName.valueOf(tableName))){
System.out.println("Table exists!");
System.exit(0);
}else{
admin.createTable(desc);
System.out.println("Create table success");
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String tableName = "output";
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
createHBaseTable(tableName);
Job job = Job.getInstance(conf);
job.setJarByClass(MR.class);
//通过scan来设置Hbase读取格式
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("words"));
TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("mr"), scan, MyMapper.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob(tableName, MyReducer.class, job);
System.exit(job.waitForCompletion(true)?0:1);
}
}
2、使用MapReduce对Hbase中的一张表数据统计,然后将结果输出到HDFS上
package com.cxy.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MR2 {
public static class MyMapper extends TableMapper<Text, IntWritable>{
static Text text = new Text();
static IntWritable one = new IntWritable(1);
@Override
protected void map(ImmutableBytesWritable key,Result value,Context context)
throws IOException, InterruptedException {
for(Cell cell:value.listCells()){
String[] strs = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()).split(" ");
for(String str:strs){
text.set(str);
context.write(text,one);
}
}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
static IntWritable two = new IntWritable(0);
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int count = 0;
for(IntWritable value:values){
count += value.get();
}
two.set(count);
context.write(key,two);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
Job job = Job.getInstance(conf);
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("words"));
job.setJarByClass(MR2.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("mr"), scan, MyMapper.class, Text.class, IntWritable.class, job);
FileOutputFormat.setOutputPath(job, new Path("/data/hbase/output"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
3、使用MapReduce对HDFS中的数据统计,然后将结果输出到HBase中的一张表上
package com.cxy.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class MR3{
public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
static Text text = new Text();
static IntWritable one = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] strs = value.toString().split(" ");
for(String str:strs){
text.set(str);
context.write(text, one);
}
}
}
public static class MyReducer extends TableReducer<Text, IntWritable, NullWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int count = 0;
for(IntWritable value:values){
count += value.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(count)));
context.write(NullWritable.get(), put);
}
}
public static void createHBaseTable(String tableName) throws IOException{
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
Connection conn = ConnectionFactory.createConnection(conf);
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
desc.addFamily(new HColumnDescriptor("content"));
Admin admin = conn.getAdmin();
if(admin.tableExists(TableName.valueOf(tableName))){
System.out.println("Table exists!");
System.exit(0);
}else{
admin.createTable(desc);
System.out.println("Create table success");
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
//创建HBase表
createHBaseTable("output2");
Job job = Job.getInstance(conf);
job.setJarByClass(MR3.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("/data/hbase/input/"));
TableMapReduceUtil.initTableReducerJob("output2", MyReducer.class, job);
System.exit(job.waitForCompletion(true)?0:1);
}
}
3469

被折叠的 条评论
为什么被折叠?



