文章目录
一、HBase Shell操作
- 创建表
hbase(main):002:0> create 'student','info'
- 插入数据到表
hbase(main):003:0> put 'student','1001','info:sex','male'
hbase(main):004:0> put 'student','1001','info:age','18'
hbase(main):005:0> put 'student','1002','info:age','20'
hbase(main):006:0> put 'student','1002','info:sex','female'
hbase(main):007:0> put 'student','1002','info:name','Rose'
- 扫描查看表数据
hbase(main):008:0> scan 'student'
ROW COLUMN+CELL
1001 column=info:age, timestamp=1592878691918, value=18
1001 column=info:sex, timestamp=1592878652665, value=male
1002 column=info:age, timestamp=1592878712127, value=20
1002 column=info:name, timestamp=1592878742670, value=Rose
1002 column=info:sex, timestamp=1592878727779, value=female
hbase(main):010:0> scan 'student',{STARTROW => '1001', STOPROW => '1002'}
ROW COLUMN+CELL
1001 column=info:age, timestamp=1592878691918, value=18
hbase(main):013:0> scan 'student',{STARTROW => '1001'}
ROW COLUMN+CELL
1001 column=info:age, timestamp=1592878691918, value=18
1001 column=info:sex, timestamp=1592878652665, value=male
1002 column=info:age, timestamp=1592878712127, value=20
1002 column=info:name, timestamp=1592878742670, value=Rose
1002 column=info:sex, timestamp=1592878727779, value=female
STARTROW
和STOPROW
均需要大写,范围是前开后闭
- 查看表结构
hbase(main):014:0> describe 'student'
Table student is ENABLED
student
COLUMN FAMILIES DESCRIPTION
{NAME => 'info', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false',
KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER',
COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536',
REPLICATION_SCOPE => '0'}
- 更新指定字段的数据
hbase(main):015:0> put 'student','1001','info:name','Nick'
hbase(main):016:0> put 'student','1001','info:age','100'
hbase(main):017:0> scan 'student',{STARTROW => '1001', STOPROW => '1001'}
ROW COLUMN+CELL
1001 column=info:age, timestamp=1592879968475, value=100
1001 column=info:name, timestamp=1592879961347, value=Nick
1001 column=info:sex, timestamp=1592878652665, value=male
- 查看“指定行”或“指定列族:列”的数据
hbase(main):019:0> get 'student','1001'
COLUMN CELL
info:age timestamp=1592879968475, value=100
info:name timestamp=1592879961347, value=Nick
info:sex timestamp=1592878652665, value=male
hbase(main):020:0> get 'student','1001','info:name'
COLUMN CELL
info:name timestamp=1592879961347, value=Nick
- 统计表数据行数
hbase(main):021:0> count 'student'
=> 2
-
删除数据
删除某rowkey
的全部数据:hbase(main):016:0> deleteall 'student','1001'
删除某rowkey
的某一列数据:hbase(main):017:0> delete 'student','1002','info:sex'
-
清空表数据
hbase(main):018:0> truncate 'student'
提示:清空表的操作顺序为先disable
,然后再truncate
。
- 删除表
首先需要先让该表为disable
状态:hbase(main):019:0> disable 'student'
然后才能drop
这个表:hbase(main):020:0> drop 'student'
提示:如果直接drop
表,会报错:ERROR: Table student is enabled. Disable it first
。
- 变更表信息
将info
列族中的数据存放3个版本:
hbase(main):022:0> alter 'student',{NAME=>'info',VERSIONS=>3}
hbase(main):023:0> put 'student','1001','info:age','50'
hbase(main):024:0> put 'student','1001','info:age','20'
hbase(main):025:0> get 'student','1001',{COLUMN=>'info:age',VERSIONS=>3}
COLUMN CELL
info:age timestamp=1592881081492, value=20
info:age timestamp=1592880933902, value=50
info:age timestamp=1592879968475, value=100
hbase(main):026:0> get 'student','1001','info:age'
COLUMN CELL
info:age timestamp=1592881081492, value=20
二、HBase API操作
HBaseAdmin
用于操作表级别、Table
用于操作表数据。
①获取Configuration
、HBaseAdmin
、Connection
对象
public class HBaseTest {
public static Configuration conf;
//用于表级别的操作
public static HBaseAdmin admin;
public static Connection connection;
static {
try {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop100,hadoop101,hadoop102");
conf.set("hbase.zookeeper.property.clientPort", "2181");
connection = ConnectionFactory.createConnection(conf);
admin = (HBaseAdmin) connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
}
②判断表是否存在
public static boolean isTableExist(String tableName) throws IOException {
return admin.tableExists(tableName);
}
③创建表
public static void createTable(String tableName, String... columnFamily) throws IOException {
if (isTableExist(tableName)) {
System.out.println("表 " + tableName + " 已存在");
} else {
//指定表名
HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
for (String cf : columnFamily) {
descriptor.addFamily(new HColumnDescriptor(cf));
}
admin.createTable(descriptor);
System.out.println("表 " + tableName + " 创建成功");
}
}
④删除表
public static void deleteTable(String tableName) throws IOException {
if (isTableExist(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("表 " + tableName + " 删除成功");
} else {
System.out.println("表 " + tableName + " 不存在");
}
}
⑤创建命名空间
/**
* 指定命名空间创建表: createTable("namespace:stu","info");
*/
public static void createNameSpace(String nameSpace) throws IOException {
NamespaceDescriptor descriptor = NamespaceDescriptor.create(nameSpace).build();
admin.createNamespace(descriptor);
}
⑥插入数据
public static void addRowData(String tableName, String rowKey, String columnFamily,
String column, String value) throws IOException {
//创建table
Table table = connection.getTable(TableName.valueOf(tableName));
//向表中插入数据
Put put = new Put(Bytes.toBytes(rowKey));
//向put对象中组装数据
put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
table.put(put);
table.close();
System.out.println("表 " + tableName + " 插入数据成功");
}
⑦删除多行数据
public static void deleteMultiRows(String tableName, String... rowKeys) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
List<Delete> deleteList = new ArrayList<>();
for (String rowKey : rowKeys) {
deleteList.add(new Delete(Bytes.toBytes(rowKey)));
}
table.delete(deleteList);
table.close();
System.out.println("删除多行数据成功");
}
⑧获取所有数据
public static void getAllRows(String tableName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
//得到用于扫描region的对象
Scan scan = new Scan();
//通过table得到ResultScanner实现类对象
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
⑨获取某一行数据
public static void getRow(String tableName, String rowKey) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Get get = new Get(Bytes.toBytes(rowKey));
Result result = table.get(get);
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
⑩获取某一行指定“列族:列”的数据
public static void getRowQualifier(String tableName, String rowKey, String family,
String qualifier) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
Result result = table.get(get);
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.println("Row Key:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
三、MapReduce
通过HBase
的相关JavaAPI,我们可以实现伴随HBase
操作的MapReduce
过程,比如使用MapReduce
将数据从本地文件系统导入到HBase
的表中,比如我们从HBase
中读取一些原始数据后使MapReduce
做数据分析。
3.1 官方HBase-MapReduce
1.环境变量的位置
export HBASE_HOME=/opt/module/hbase-1.3.1
export HADOOP_HOME=/opt/module/hadoop-2.7.2
2.配置hadoop-env.sh
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase-1.3.1/lib/*
3.案例一:统计表有多少行数据
[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar \
lib/hbase-server-1.3.1.jar rowcounter student
4.案例二:使用MapReduce
将本地数据导入到HBase
- 在本地创建一个
tsv
格式的文件:fruit.tsv
1001 Apple Red
1002 Pear Yellow
1003 Pineapple Yellow
- 创建
HBase
表
hbase(main):001:0> create 'fruit','info'
- 上传
fruit.tsv
[root@hadoop100 datas]#/opt/module/hadoop-2.7.2/bin/hdfs dfs -put fruit.tsv \
- 执行
MapReduce
导入数据到HBase
的fruit
表中
[root@hadoop100 hbase-1.3.1]#/opt/module/hadoop-2.7.2/bin/yarn jar \
lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop100:9000/input_fruit
3.2 自定义MapReduce将本地数据导入到HBase
mapper类:
public class FruitMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
//1001 Apple Red
String line = value.toString();
if (StringUtils.isNotEmpty(line)) {
String[] fields = line.split("\t");
String rowKey = fields[0];
String name = fields[1];
String color = fields[2];
//ImmutableBytesWritable是RowKey的类型
ImmutableBytesWritable k = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(color));
context.write(k, put);
}
}
}
Reduce类:
public class FruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context)
throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(), value);
}
}
}
驱动Driver:
public class FruitDriver extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
Configuration conf = this.getConf();
//1.获取Job对象
Job job = Job.getInstance(conf);
//2.设置驱动类路径
job.setJarByClass(FruitDriver.class);
//3.设置Mapper&Mapper输出的KV类型
job.setMapperClass(FruitMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//4.设置Reducer类
TableMapReduceUtil.initTableReducerJob("fruit", FruitReducer.class, job);
//5.设置输入输出参数
Path inPath = new Path("hdfs://hadoop100:9000/input_fruit/fruit.tsv");
FileInputFormat.addInputPath(job, inPath);
//6.提交任务
boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = HBaseConfiguration.create();
int status = ToolRunner.run(configuration, new FruitDriver(), args);
System.exit(status);
}
}
打包运行:
[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar hbase-1.0-SNAPSHOT.jar /
com.hucheng.mr.dfs2hbase.FruitDriver
3.3 从HBase导入数据到HBase
mapper类:
public class FruitMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
Put put = new Put(key.get());
Cell[] cells = value.rawCells();
for (Cell cell : cells) {
if ("info".equals(Bytes.toString(CellUtil.cloneFamily(cell))) &&
"name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
put.add(cell);
}
}
context.write(key, put);
}
}
Reduce类:
public class FruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context)
throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(), value);
}
}
}
驱动Driver:
public class FruitDriver extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
Configuration conf = this.getConf();
//1.获取Job对象
Job job = Job.getInstance(conf);
//2.设置驱动类路径
job.setJarByClass(FruitDriver.class);
//3.配置Job
Scan scan = new Scan();
scan.setCacheBlocks(false);
scan.setCaching(500);
//4.设置Mapper
TableMapReduceUtil.initTableMapperJob(
"fruit",//数据源表名
scan,//scan扫描控制器
FruitMapper.class,//Mapper类
ImmutableBytesWritable.class,//Mapper输出K
Put.class,//Mapper输出V
job
);
//5.设置Reducer类
TableMapReduceUtil.initTableReducerJob("fruit2", FruitReducer.class, job);
//6.提交任务
boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = HBaseConfiguration.create();
int status = ToolRunner.run(configuration, new com.hucheng.mr.dfs2hbase.FruitDriver(),
args);
System.exit(status);
}
}
打包运行:
[root@hadoop100 hbase-1.3.1]# /opt/module/hadoop-2.7.2/bin/yarn jar hbase-1.0-SNAPSHOT.jar /
com.hucheng.mr.hbase2hbase.FruitDriver