1.在HBase上创建table 'wc'
create 'wc','cf1'
2.wordcount的job
package zzw.cn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
public class WordCountJob
{
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
String inputPath="hdfs://master.cn:8020/user/input/";
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master.cn,slave1.cn,slave2.cn");
Job job = Job.getInstance(conf);
job.setJobName("word count");
job.setJarByClass(WordCountJob.class);
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
TableMapReduceUtil.initTableReducerJob(
"wc", // output table
WordCountReduce.class, // reducer class
job);
Boolean flag = job.waitForCompletion(true);
if (flag)
{
System.out.println("job success");
}
}
}
3.Mapper类
package zzw.cn;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String[] strs = StringUtils.split(value.toString(), ' ');
for (int i = 0; i < strs.length; i++)
{
String s = strs[i];
context.write(new Text(s), new IntWritable(1));
}
}
}
4.Reduce类
package zzw.cn;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class WordCountReduce extends TableReducer<Text, IntWritable, ImmutableBytesWritable>
{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
int i = 0;
for (IntWritable value : values)
{
i += value.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn("cf1".getBytes(), "count".getBytes(), Bytes.toBytes(i + ""));
context.write(null, put);
}
}
5.pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>zzw.cn</groupId>
<artifactId>hbase.mapreduce</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0-cdh5.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0-cdh5.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.0-cdh5.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0-cdh5.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.0-cdh5.15.0</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
</project>
6.打成jar包在hdfs上运行
hadoop jar /opt/datas/HBaseMrDemo.jar
7.查询hbase中wc的结果:
hbase(main):022:0> scan 'wc'
ROW COLUMN+CELL
hadoop column=cf1:count, timestamp=1543742364591, value=2
hbase column=cf1:count, timestamp=1543742364591, value=1
hello column=cf1:count, timestamp=1543742364591, value=2
name column=cf1:count, timestamp=1543742364591, value=3
world column=cf1:count, timestamp=1543742364591, value=1
zookeeper column=cf1:count, timestamp=1543742364591, value=1
6 row(s) in 0.0250 seconds