HBase的java代码开发
熟练掌握通过使用java代码实现HBase数据库当中的数据增删改查的操作,特别是各种查询,熟练运用
第一步:创建maven工程,导入jar包
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.14.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*/RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
第二步:开发javaAPI操作HBase表数据
1、创建表myuser
public static void createTable() throws IOException {
Configuration conf =new Configuration();
//连接hbase集群不需要指定hbase主节点的ip地址和端口号
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
//创建连接对象
Connection connection = ConnectionFactory.createConnection(conf);
//获取连接对象,创建一张表
//获取管理员对象,来对数据库进行DDL的操作
Admin admin = connection.getAdmin();
//指定我们的表名
TableName myuser = TableName.valueOf("myuser");
HTableDescriptor hTableDescriptor = new HTableDescriptor(myuser);
//指定两个列族
HColumnDescriptor f1 = new HColumnDescriptor("f1");
HColumnDescriptor f2 = new HColumnDescriptor("f2");
hTableDescriptor.addFamily(f1);
hTableDescriptor.addFamily(f2);
admin.createTable(hTableDescriptor);
admin.close();
connection.close();
}
2、向表中添加数据
/**
* 插入数据
*/
public void addDatas() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181");
Connection connection = ConnectionFactory.createConnection(conf);
//获取表
Table myuser = connection.getTable(TableName.valueOf("myuser"));
//创建put对象,并指定rowkey
Put put = new Put("0001".getBytes());
put.addColumn("f1".getBytes(),"id".getBytes(), Bytes.toBytes(1));
put.addColumn("f1".getBytes(),"name".getBytes(), Bytes.toBytes("张三"));
put.addColumn("f1".getBytes(),"age".getBytes(), Bytes.toBytes(18));
put.addColumn("f2".getBytes(),"address".getBytes(), Bytes.toBytes("地球人"));
put.addColumn("f2".getBytes(),"phone".getBytes(), Bytes.toBytes("15874102589"));
//插入数据
myuser.put(put);
//关闭表
myuser.close();
}
3、查询数据
初始化一批数据到HBase当中用于查询
public void insertBatchData() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181");
Connection connection = ConnectionFactory.createConnection(conf);
//获取表
Table myuser = connection.getTable(TableName.valueOf("myuser"));
//创建put对象,并指定rowkey
Put put = new Put("0002".getBytes());
put.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(1));
put.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("曹操"));
put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(30));
put.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("沛国谯县"));
put.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("16888888888"));
put.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("helloworld"));
Put put2 = new Put("0003".getBytes());
put2.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(2));
put2.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("刘备"));
put2.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(32));
put2.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put2.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("幽州涿郡涿县"));
put2.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("17888888888"));
put2.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("talk is cheap , show me the code"));
Put put3 = new Put("0004".getBytes());
put3.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(3));
put3.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("孙权"));
put3.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(35));
put3.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put3.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("下邳"));
put3.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("12888888888"));
put3.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("what are you 弄啥嘞!"));
Put put4 = new Put("0005".getBytes());
put4.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(4));
put4.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("诸葛亮"));
put4.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(28));
put4.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put4.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("四川隆中"));
put4.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("14888888888"));
put4.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("出师表你背了嘛"));
Put put5 = new Put("0005".getBytes());
put5.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(5));
put5.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("司马懿"));
put5.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(27));
put5.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put5.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("哪里人有待考究"));
put5.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("15888888888"));
put5.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("跟诸葛亮死掐"));
Put put6 = new Put("0006".getBytes());
put6.addColumn("f1".getBytes(),"id".getBytes(),Bytes.toBytes(5));
put6.addColumn("f1".getBytes(),"name".getBytes(),Bytes.toBytes("xiaobubu—吕布"));
put6.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(28));
put6.addColumn("f2".getBytes(),"sex".getBytes(),Bytes.toBytes("1"));
put6.addColumn("f2".getBytes(),"address".getBytes(),Bytes.toBytes("内蒙人"));
put6.addColumn("f2".getBytes(),"phone".getBytes(),Bytes.toBytes("15788888888"));
put6.addColumn("f2".getBytes(),"say".getBytes(),Bytes.toBytes("貂蝉去哪了"));
List<Put> listPut = new ArrayList<Put>();
listPut.add(put);
listPut.add(put2);
listPut.add(put3);
listPut.add(put4);
listPut.add(put5);
listPut.add(put6);
myuser.put(listPut);
myuser.close();
}
按照rowkey进行查询获取所有列的所有值
查询主键rowkey为0003的人
/**
* 查询数据,按照主键id进行查询
*/
public void searchData() throws IOException {
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Get get = new Get(Bytes.toBytes("0003"));
Result result = myuser.get(get);
Cell[] cells = result.rawCells();
//获取所有的列名称以及列的值
for (Cell cell : cells) {
//注意,如果列属性是int类型,那么这里就不会显示
if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
}else {
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
}
}
myuser.close();
}
按照rowkey查询指定列族下面的指定列的值
public static void searchdata2() throws IOException{
Configuration conf=new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Get get=new Get("0003".getBytes());
// get.addFamily("f1".getBytes());
get.addColumn("f1".getBytes(),"name".getBytes());
Result result = myuser.get(get);
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
}else {
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
通过startRowKey和endRowKey进行扫描
/**
* 通过startRowKey和endRowKey进行扫描查询
*/
public static void scanrowkey() throws IOException{
Configuration conf=new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan =new Scan();
scan.setStartRow("0002".getBytes());
scan.setStopRow("0006".getBytes());
ResultScanner scanner = myuser.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowkey "+Bytes.toString(result.getRow()));
/*
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(),"name".getBytes())));
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"age".getBytes())));
*/
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
}else {
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
}
通过scan进行全表扫描
/**
* 全表扫描
*/
public static void scanrow() throws IOException{
Configuration conf=new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan =new Scan();
ResultScanner scanner = myuser.getScanner(scan);
for (Result result : scanner) {
System.out.println("rowkey "+Bytes.toString(result.getRow()));
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(),"name".getBytes())));
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(),"age".getBytes())));
/* Cell[] cells = result.rawCells();
for (Cell cell : cells) {
if (Bytes.toString(CellUtil.cloneQualifier(cell)).equals("id" )|| Bytes.toString(CellUtil.cloneQualifier(cell)).equals("age")){
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toInt(CellUtil.cloneValue(cell)));
}else {
System.out.println(Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(Bytes.toString(CellUtil.cloneRow(cell)));
System.out.println(Bytes.toString(CellUtil.cloneValue(cell)));
}
}*/
}
}
4、过滤器查询
过滤器的类型很多,但是可以分为两大类——比较过滤器,专用过滤器
过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端;
hbase过滤器的比较运算符:
LESS < LESS_OR_EQUAL <= EQUAL = NOT_EQUAL <> GREATER_OR_EQUAL >= GREATER > NO_OP 排除所有 |
Hbase过滤器的专用过滤器(指定比较机制):
BinaryComparator 按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[]) BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同 NullComparator 判断给定的是否为空 BitComparator 按位比较 RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL SubstringComparator 判断提供的子串是否出现在value中。 |
1、比较过滤器
1、rowKey过滤器RowFilter
通过RowFilter过滤比rowKey 0003小的所有值出来
/**
* hbase行键过滤器RowFilter
*/
public void rowKeyFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("0003")));
scan.setFilter(rowFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "age".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
}
myuser.close();
}
2、列族过滤器FamilyFilter
查询比f2列族小的所有的列族内的数据
/**
* hbase列族过滤器FamilyFilter
*/
public void familyFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.LESS, new SubstringComparator("f2"));
scan.setFilter(familyFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "age".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
}
myuser.close();
}
3、列过滤器QualifierFilter
只查询name列的值
/**
* hbase列过滤器
*/
public void qualifierFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("name"));
scan.setFilter(qualifierFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
// System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
}
myuser.close();
}
4、列值过滤器ValueFilter
查询所有列当中包含8的数据
/**
* hbase值过滤器
* 查询包含8的列值
*/
public void valueFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));
scan.setFilter(valueFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
// System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));
}
myuser.close();
}
2、专用过滤器
1、单列值过滤器 SingleColumnValueFilter
SingleColumnValueFilter会返回满足条件的整列值的所有字段
/**
* 单列值过滤器,返回满足条件的整行数据
*/
public void singleColumnFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());
scan.setFilter(singleColumnValueFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));
}
myuser.close();
}
2、列值排除过滤器SingleColumnValueExcludeFilter
与SingleColumnValueFilter相反,会排除掉指定的列,其他的列全部返回
3、rowkey前缀过滤器PrefixFilter
查询以00开头的所有前缀的rowkey
/**
* 行键前缀过滤器
*/
public void preFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
PrefixFilter prefixFilter = new PrefixFilter("00".getBytes());
scan.setFilter(prefixFilter);
ResultScanner resultScanner = myuser.getScanner(scan);
for (Result result : resultScanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));
}
myuser.close();
}
4、分页过滤器PageFilter
通过pageFilter实现分页过滤器
public void pageFilter2() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
int pageNum = 3;
int pageSize = 2;
Scan scan = new Scan();
if (pageNum == 1) {
PageFilter filter = new PageFilter(pageSize);
scan.setStartRow(Bytes.toBytes(""));
scan.setFilter(filter);
scan.setMaxResultSize(pageSize);
ResultScanner scanner = myuser.getScanner(scan);
for (Result result : scanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
// System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
//System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));
}
}else{
String startRowKey ="";
PageFilter filter = new PageFilter((pageNum - 1) * pageSize + 1 );
scan.setStartRow(startRowKey.getBytes());
scan.setMaxResultSize((pageNum - 1) * pageSize + 1);
scan.setFilter(filter);
ResultScanner scanner = myuser.getScanner(scan);
for (Result result : scanner) {
byte[] row = result.getRow();
startRowKey = new String(row);
}
Scan scan2 = new Scan();
scan2.setStartRow(startRowKey.getBytes());
scan2.setMaxResultSize(Long.valueOf(pageSize));
PageFilter filter2 = new PageFilter(pageSize);
scan2.setFilter(filter2);
ResultScanner scanner1 = myuser.getScanner(scan2);
for (Result result : scanner1) {
byte[] row = result.getRow();
System.out.println(new String(row));
}
}
myuser.close();
}
3、多过滤器综合查询FilterList
需求:使用SingleColumnValueFilter查询f1列族,name为刘备的数据,并且同时满足rowkey的前缀以00开头的数据(PrefixFilter)
/**
* 多过滤器组合使用
*/
public void manyFilter() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Scan scan = new Scan();
FilterList filterList = new FilterList();
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());
PrefixFilter prefixFilter = new PrefixFilter("00".getBytes());
filterList.addFilter(singleColumnValueFilter);
filterList.addFilter(prefixFilter);
scan.setFilter(filterList);
ResultScanner scanner = myuser.getScanner(scan);
for (Result result : scanner) {
//获取rowkey
System.out.println(Bytes.toString(result.getRow()));
//指定列族以及列打印列当中的数据出来
// System.out.println(Bytes.toInt(result.getValue("f1".getBytes(), "id".getBytes())));
System.out.println(Bytes.toString(result.getValue("f1".getBytes(), "name".getBytes())));
//System.out.println(Bytes.toString(result.getValue("f2".getBytes(), "phone".getBytes())));
}
myuser.close();
}
5、根据rowkey删除数据
/**
* 删除数据
*/
public void deleteByRowKey() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Table myuser = connection.getTable(TableName.valueOf("myuser"));
Delete delete = new Delete("0001".getBytes());
myuser.delete(delete);
myuser.close();
}
6、删除表操作
public void deleteTable() throws IOException {
//获取连接
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","node01:2181,node02:2181,node03:2181");
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
admin.disableTable(TableName.valueOf("myuser"));
admin.deleteTable(TableName.valueOf("myuser"));
admin.close();
}
14、HBase与MapReduce的集成
HBase当中的数据最终都是存储在HDFS上面的,HBase天生的支持MR的操作,我们可以通过MR直接处理HBase当中的数据,并且MR可以将处理后的结果直接存储到HBase当中去
需求:读取HBase当中一张表的数据,然后将数据写入到HBase当中的另外一张表当中去。注意:我们可以使用TableMapper与TableReducer来实现从HBase当中读取与写入数据
这里我们将myuser这张表当中f1列族的name和age字段写入到myuser2这张表的f1列族当中去
http://archive.cloudera.com/cdh5/cdh/5/hbase-1.2.0-cdh5.14.0/book.html#mapreduce
需求一:读取myuser这张表当中的数据写入到HBase的另外一张表当中去
第一步:创建myuser2这张表
注意:列族的名字要与myuser表的列族名字相同
hbase(main):010:0> create 'myuser2','f1'
第二步:创建maven工程,导入jar包
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.14.3</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*/RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
第三步:开发MR的程序
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HBaseMR extends Configured implements Tool{
public static class HBaseMapper extends TableMapper<Text,Put>{
/**
*
* @param key 我们的主键rowkey
* @param value 我们一行数据所有列的值都封装在value里面了
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
byte[] bytes = key.get();
String rowKey = Bytes.toString(bytes);
Put put = new Put(key.get());
Cell[] cells = value.rawCells();
for (Cell cell : cells) {
if("f1".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
}
if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
}
}
}
if(!put.isEmpty()){
context.write(new Text(rowKey),put);
}
}
}
public static class HBaseReducer extends TableReducer<Text,Put,ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(null,value);
}
}
}
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(super.getConf(), "hbaseMr");
job.setJarByClass(this.getClass());
Scan scan = new Scan();
scan.setCaching(500);
scan.setCacheBlocks(false);
//使用TableMapReduceUtil 工具类来初始化我们的mapper
TableMapReduceUtil.initTableMapperJob(TableName.valueOf("myuser"),scan,HBaseMapper.class,Text.class,Put.class,job);
//使用TableMapReduceUtil 工具类来初始化我们的reducer
TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);
job.setNumReduceTasks(1);
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
//创建HBaseConfiguration配置
Configuration conf =new Configuration();
int run = ToolRunner.run(configuration, new HBaseMR(), args);
System.exit(run);
}
}
第四步:打包运行
注意,我们需要使用打包插件,将HBase的依赖jar包都打入到工程jar包里面去
然后执行
yarn jar hbaseStudy-1.0-SNAPSHOT.jar cn.itcast.hbasemr.HBaseMR
或者我们也可以自己设置我们的环境变量
export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0/
export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0/
export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
yarn jar original-hbaseStudy-1.0-SNAPSHOT.jar cn.itcast.hbasemr.HBaseMR
需求二:读取HDFS文件,写入到HBase表当中去
读取hdfs路径/hbase/input/user.txt内容如下
0007 zhangsan 18
0008 lisi 25
0009 wangwu 20
第一步:准备数据文件
准备数据文件,并将数据文件上传到HDFS上面去
hdfs dfs -mkdir -p /hbase/input
cd /export/servers/
vim user.txt
0007 zhangsan 18
0008 lisi 25
0009 wangwu 20
第二步:开发MR程序
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Hdfs2Hbase extends Configured implements Tool{
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(super.getConf(), "hdfs2Hbase");
job.setJarByClass(Hdfs2Hbase.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/hbase/input"));
job.setMapperClass(HdfsMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
TableMapReduceUtil.initTableReducerJob("myuser2",HBaseReducer.class,job);
job.setNumReduceTasks(1);
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
Configuration conf =new Configuration();
int run = ToolRunner.run(configuration, new Hdfs2Hbase(), args);
System.exit(run);
}
public static class HdfsMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
public static class HBaseReducer extends TableReducer<Text,NullWritable,ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
String[] split = key.toString().split("\t");
Put put = new Put(Bytes.toBytes(split[0]));
put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());
put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));
context.write(new ImmutableBytesWritable(Bytes.toBytes(split[0])),put);
}
}
}
需求三:作业,读取HBase的表数据,然后将数据写入到hdfs上面去
需求四:通过bulkload的方式批量加载数据到HBase当中去
加载数据到HBase当中去的方式多种多样,我们可以使用HBase的javaAPI或者使用sqoop将我们的数据写入或者导入到HBase当中去,但是这些方式不是慢就是在导入的过程的占用Region资料导致效率低下,我们也可以通过MR的程序,将我们的数据直接转换成HBase的最终存储格式HFile,然后直接load数据到HBase当中去即可
HBase中每张Table在根目录(/HBase)下用一个文件夹存储,Table名为文件夹名,在Table文件夹下每个Region同样用一个文件夹存储,每个Region文件夹下的每个列族也用文件夹存储,而每个列族下存储的就是一些HFile文件,HFile就是HBase数据在HFDS下存储格式,所以HBase存储文件最终在hdfs上面的表现形式就是HFile,如果我们可以直接将数据转换为HFile的格式,那么我们的HBase就可以直接读取加载HFile格式的文件,就可以直接读取了
优点:
1.导入过程不占用Region资源
2.能快速导入海量的数据
3.节省内存
HBase数据正常读写流程
使用bulkload的方式将我们的数据直接生成HFile格式,然后直接加载到HBase的表当中去
需求:将我们hdfs上面的这个路径/hbase/input/user.txt的数据文件,转换成HFile格式,然后load到myuser2这张表里面去
第一步:定义我们的mapper类
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class LoadMapper extends Mapper<LongWritable,Text,ImmutableBytesWritable,Put>{
@Override
protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
Put put = new Put(Bytes.toBytes(split[0]));
put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());
put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));
context.write(new ImmutableBytesWritable(Bytes.toBytes(split[0])),put);
}
}
第二步:开发我们的main程序入口类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HBaseLoad extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
final String INPUT_PATH= "hdfs://node01:8020/hbase/input";
final String OUTPUT_PATH= "hdfs://node01:8020/hbase/output_hfile";
Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Table table = connection.getTable(TableName.valueOf("myuser2"));
Job job= Job.getInstance(conf);
job.setJarByClass(HBaseLoad.class);
job.setMapperClass(LoadMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
HFileOutputFormat2.configureIncrementalLoad(job,table,connection.getRegionLocator(TableName.valueOf("myuser2")));
FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH));
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
Configuration conf =new Configuration();
int run = ToolRunner.run(configuration, new HBaseLoad(), args);
System.exit(run);
}
}
第三步:将代码打成jar包然后进行运行
yarn jar original-hbaseStudy-1.0-SNAPSHOT.jar cn.itcast.hbasemr.HBaseLoad
第四步:开发代码,加载数据
将我们的输出路径下面的HFile文件,加载到我们的hbase表当中去
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
public class LoadData {
public static void main(String[] args) throws Exception {
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.property.clientPort", "2181");
configuration.set("hbase.zookeeper.quorum", "node01,node02,node03");
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
Table table = connection.getTable(TableName.valueOf("myuser2"));
LoadIncrementalHFiles load = new LoadIncrementalHFiles(configuration);
load.doBulkLoad(new Path("hdfs://node01:8020/hbase/output_hfile"), admin,table,connection.getRegionLocator(TableName.valueOf("myuser2")));
}
}
或者我们也可以通过命令行来进行加载数据
先将hbase的jar包添加到hadoop的classpath路径下
export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0/
export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0/
export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
然后执行以下命令,将hbase的HFile直接导入到表myuser2当中来
yarn jar /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0.jar completebulkload /hbase/output_hfile myuser2