HBase API
1. 环境准备
新建Maven项目后在 pom.xml 中添加依赖:
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.3.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.3.1</version>
</dependency>
</dependencies>
2. HBaseAPI
2.1 获取 Configuration 对象
/**
* @Date 2021/1/25 20:40
* @Version 10.21
* @Author DuanChaojie
*/
public class TestAPI {
private static Connection connection = null;
private static Admin admin = null;
static {
try {
// 旧API
//HBaseConfiguration configuration = new HBaseConfiguration();
// 新API
// 1.获取配置文件信息
Configuration configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum","hadoop,hadoop101,hadoop102");
configuration.set("hbase.zookeeper.property.clientPort","2181");
// 2.获取管理员对象
connection = ConnectionFactory.createConnection(configuration);
//admin = new HBaseAdmin(configuration);
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 关闭资源
*/
public static void close(){
if (admin != null) {
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (connection != null) {
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
2.2 判断表是否存在
/**
* 1.判断表是否存在admin.tableExists();
* @param tableName
* @return
* @throws IOException
*/
public static boolean isTableExist(String tableName) throws IOException {
boolean exist = admin.tableExists(TableName.valueOf(tableName));
return exist;
}
2.3 创建表
/**
* 2.创建表
* @param tableName
* @param cfs
*/
public static void createTable(String tableName,String... cfs) throws IOException {
// 1.判断是否存在列簇信息
if (cfs.length <= 0) {
System.out.println("请设置列簇信息!");
return;
}
// 2.判断表是否存在
if (isTableExist(tableName)){
System.out.println(tableName +"表已经存在!");
return;
}
// 3.创建表描述器???
HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));
// 4.循环添加列簇信息
for (String cf : cfs) {
// 5.创建列簇描述器
HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(cf);
// 6.添加具体的列簇信息
hTableDescriptor.addFamily(hColumnDescriptor);
}
// 7.创建表
admin.createTable(hTableDescriptor);
}
2.4 删除表
/**
* 3.删除表
* @param tableName
*/
public static void dropTable(String tableName) throws IOException {
// 1.判断表是否存在
if (!isTableExist(tableName)){
System.out.println(tableName +"表不存在!");
return;
}
// 2.使表下线
admin.disableTable(TableName.valueOf(tableName));
// 3.删除表
admin.deleteTable(TableName.valueOf(tableName));
}
2.5 创建命名空间
/**
* 4.创建命名空间
* @param ns
*/
public static void createNameSpace(String ns){
// 1.创建命名空间描述器
NamespaceDescriptor namespaceDescriptor = NamespaceDescriptor.create(ns).build();
// 3.创建命名空间
try {
admin.createNamespace(namespaceDescriptor);
}catch (NamespaceExistException e){
System.out.println(ns + "命名空间已经存在!");
} catch (IOException e) {
e.printStackTrace();
}
}
2.6 向表中插入数据
/**
* 向表中插入数据
* @param tableName
* @param rowKey
* @param cf
* @param cn
* @param value
*/
public static void putData(String tableName,String rowKey,String cf,String cn,String value) throws IOException {
// 1.获取表对象
Table table = connection.getTable(TableName.valueOf(tableName));
// 2.通过Bytes工具类创建Put对象
Put put = new Put(Bytes.toBytes(rowKey));
// 3.给Put对象赋值
put.addColumn(Bytes.toBytes(cf),Bytes.toBytes(cn),Bytes.toBytes(value));
// 4.插入数据
table.put(put);
// 5.关闭资源
table.close();
}
2.7 获取数据get
/**
* 获取数据
* @param tableName
* @param rowkey
* @param cf
* @param cn
*/
public static void getData(String tableName,String rowkey,String cf,String cn) throws IOException {
// 1.获取表对象
Table table = connection.getTable(TableName.valueOf(tableName));
// 2.创建Get对象
Get get = new Get(Bytes.toBytes(rowkey));
// 2.1 指定列簇
//get.addFamily(Bytes.toBytes(cf));
// 2.2 指定列簇和列
get.addColumn(Bytes.toBytes(cf),Bytes.toBytes(cn));
//2.3 设置获取数据的版本数
get.setMaxVersions(5);
// 3.获取数据
Result result = table.get(get);
// 4.解析Result,并打印
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
// 5.打印数据
String CF = Bytes.toString(CellUtil.cloneFamily(cell));
String CN = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("CF = " + CF);
System.out.println("CN = " + CN);
System.out.println("value = " + value);
}
}
2.8 获取数据san
/**
* 获取数据(Scan)
* @param tableName
*/
public static void scanTable(String tableName) throws IOException {
// 1.获取表对象
Table table = connection.getTable(TableName.valueOf(tableName));
// 2.构建scan对象
//Scan scan = new Scan();
Scan scan = new Scan(Bytes.toBytes("1001"),Bytes.toBytes("1003"));
// 3.扫描表
ResultScanner resultScanner = table.getScanner(scan);
// 4.解析resultScanner
for (Result result : resultScanner) {
// 5.解析resutl并打印
for (Cell cell : result.rawCells()) {
String CF = Bytes.toString(CellUtil.cloneFamily(cell));
String CN = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("CF = " + CF);
System.out.println("CN = " + CN);
System.out.println("value = " + value);
}
}
}
2.9 删除数据
/**
* 删除数据
* @param tableName
* @param rowkey
* @param cf
* @param cn
*/
public static void deleteData(String tableName,String rowkey,String cf,String cn) throws IOException {
// 1.获取表对象
Table table = connection.getTable(TableName.valueOf(tableName));
// 2.构建delete对象
Delete delete = new Delete(Bytes.toBytes(rowkey));
// 2.1 设置删除的列
// 删除指定列的最新版本。
// 这种方式删除和flush机制有一定的逻辑上的冲突,所以在生产环境中慎用
//delete.addColumn(Bytes.toBytes(cf),Bytes.toBytes(cn));
// 删除指定列的所有版本。
// 也可以手动添加删除数据的时间戳
//delete.addColumns(Bytes.toBytes(cf),Bytes.toBytes(cn));
// 2.2 删除指定的列簇
delete.addFamily(Bytes.toBytes(cf));
// 3.执行删除操作
table.delete(delete);
table.close();
}
2.10 main方法
public static void main(String[] args) throws IOException {
/*boolean flag1 = isTableExist("user");
System.out.println("flag1 = " + flag1);
createTable("user","info1","info2");
dropTable("user");
boolean flag2 = isTableExist("user");
System.out.println("flag2 = " + flag2);
*/
// 创建命名空间
//createNameSpace("mmdd");
//关闭资源
// 插入数据
//putData("student","1001","info","name","lisi");
getData("student","1001","info","name");
close();
}
3. HBaseAPI 和MapReduce
通过 HBase 的相关 JavaAPI,我们可以实现伴随 HBase 操作的 MapReduce 过程,比如使用MapReduce 将数据从本地文件系统导入到 HBase 的表中,比如我们从 HBase 中读取一些原始数据后使用 MapReduce 做数据分析。
3.1 官方 HBase-MapReduce
#1、查看 HBase 的 MapReduce 任务的执行
bin/hbase mapredcp
#2、环境变量的导入
##一、执行环境变量的导入(临时生效,在命令行执行下述操作)
export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
##二、永久生效:在/etc/profile 配置
export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-2.7.2
##并在 hadoop-env.sh 中配置:(注意:在 for 循环之后配)☆
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
#运行官方的 MapReduce 任务
## 案例一:统计 Student 表中有多少行数据
## 在HBase Home目录中
/opt/module/hadoop-2.7.2/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter student
## 案例二:使用 MapReduce 将本地数据导入到 HBase
## 1)在本地创建一个 tsv 格式的文件:fruit.tsv
1001 Apple Red
1002 Pear Yellow
1003 Pineapple Yellow
## 2)创建 Hbase 表
create 'fruit','info'
## 3)在 HDFS 中创建 input_fruit 文件夹并上传 fruit.tsv 文件
/opt/module/hadoop-2.7.2/bin/hdfs dfs -mkdir /input_fruit/
/opt/module/hadoop-2.7.2/bin/hdfs dfs -put fruit.tsv /input_fruit/
## 4)执行 MapReduce 到 HBase 的 fruit 表中
/opt/module/hadoop-2.7.2/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop:9000/input_fruit
## 5)使用 scan 命令查看导入后的结果
scan ‘fruit’
3.2 MR往HBase写数据
将HDFS上的文件fruit.tsv文件内容写到HBase中的fruit1表中
FruitMapper
package cn.itbuild.mr1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Date 2021/1/28 22:07
* @Version 10.21
* @Author DuanChaojie
* 用于读取 fruit 表中的数据
*/
public class FruitMapper extends Mapper<LongWritable,Text,LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
FruitReducer
package cn.itbuild.mr1;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
/**
* @Date 2021/1/28 22:08
* @Version 10.21
* @Author DuanChaojie
* 用于将读取到的 fruit.tsv中的数据写入到 fruit1表中
*/
public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// 1.遍历values
for (Text value : values) {
// 2.获取每一行数据
String[] line = value.toString().split("\t");
// 3.构建Put对象
Put put = new Put(Bytes.toBytes(line[0]));
// 4.给Put对象赋值
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(line[1]));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(line[1]));
// 5.写出
context.write(NullWritable.get(),put);
}
}
}
FruitDriver
package cn.itbuild.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @Date 2021/1/28 22:08
* @Version 10.21
* @Author DuanChaojie
* 用于组装运行 Job任务
*/
public class FruitDriver implements Tool {
// 定义一个Configuration
private Configuration configuration = null;
public int run(String[] args) throws Exception {
// 1.获取Job对象
Job job = Job.getInstance(configuration);
// 2.设置驱动类路径
job.setJarByClass(FruitDriver.class);
// 3.设置Mapper和Mapper输出的KV类型
job.setMapperClass(FruitMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
// 4.设置Reducer类
TableMapReduceUtil.initTableReducerJob(args[1],
FruitReducer.class,
job);
// 5.设置输入参数
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 6.提交任务
boolean resutl = job.waitForCompletion(true);
return resutl ? 0 : 1;
}
public void setConf(Configuration conf) {
configuration = conf;
}
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new FruitDriver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
打包运行任务

#运行任务前,如果待数据导入的表不存在,则需要提前创建。
create 'fruit1','info'
#hbase-demo-1.0-SNAPSHOT.jar上传到hbase的Home目录中
yarn jar hbase-demo-1.0-SNAPSHOT.jar cn.itbuild.mr1.FruitDriver /input_fruit/fruit.tsv fruit1
3.3 MR读HBase的fruit1表中的数据 写到HBase的fruit2表中
Fruit2Mapper
package cn.itbuild.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
* @Date 2021/1/28 22:49
* @Version 10.21
* @Author DuanChaojie
*/
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
// 构建Put对象
Put put = new Put(key.get());
// 1.获取数据
for (Cell cell : value.rawCells()) {
// 2.判断当前的cell是否为name列
if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
// 3.给Put对象赋值
put.add(cell);
}
}
// 4.写出
context.write(key,put);
}
}
Fruit2Reducer
package cn.itbuild.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* @Date 2021/1/28 22:50
* @Version 10.21
* @Author DuanChaojie
*/
public class Fruit2Reducer extends TableReducer<ImmutableBytesWritable,Put,NullWritable> {
/**
Iterable<Put>和JDK中的迭代器不一样,因为处理的数据量不同
*/
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
// 遍历写出
for (Put put : values) {
context.write(NullWritable.get(),put);
}
}
}
Fruit2Driver
package cn.itbuild.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @Date 2021/1/28 22:50
* @Version 10.21
* @Author DuanChaojie
*/
public class Fruit2Driver implements Tool {
// 定义一个Configuration
private Configuration configuration = null;
public int run(String[] args) throws Exception {
// 1.获取Job对象
Job job = Job.getInstance(configuration);
// 2.设置驱动类路径
job.setJarByClass(Fruit2Driver.class);
// 3.设置Mapper和Mapper输出的KV类型
TableMapReduceUtil.initTableMapperJob(args[0],new Scan(),Fruit2Mapper.class, ImmutableBytesWritable.class, Put.class,job);
// 4.设置Reducer类
TableMapReduceUtil.initTableReducerJob(args[1],
Fruit2Reducer.class,
job);
// 5.设置输入参数
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 6.提交任务
boolean resutl = job.waitForCompletion(true);
return resutl ? 0 : 1;
}
public void setConf(Configuration conf) {
configuration = conf;
}
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new Fruit2Driver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
打包运行任务
#提前创建好fruit2表
create 'fruit2','info'
#将重新打包的hbase-demo-1.0-SNAPSHOT.jar上传到hbase的Home目录中
yarn jar hbase-demo-1.0-SNAPSHOT.jar cn.itbuild.mr2.Fruit2Driver fruit fruit2
444

被折叠的 条评论
为什么被折叠?



