一、创建表
//创建表
try {
Admin admin = conn.getAdmin();
TableName tableName = TableName.valueOf("user_fengze");
if (admin.tableExists(tableName)) {// 如果存在要创建的表,那么先删除,再创建
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println(tableName + " is exist,detele....");
}
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
tableDescriptor.addFamily(new HColumnDescriptor("columnfamily1"));
tableDescriptor.addFamily(new HColumnDescriptor("columnfamily2"));
admin.createTable(tableDescriptor);
二、增 数据
//增 改:put来覆盖
// 插入一条数据
Put put = new Put(Bytes.toBytes(rowkey), ts);
put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), Bytes.toBytes(value));
htable.put(put);
// 插入多条数据
Table table = conn.getTable(TableName.valueOf("tableName001"));
// 构造要插入的数据为一个Put类型(一个put对象只能对应一个rowkey)的对象
Put put = new Put(Bytes.toBytes("rowkey001"));
put.addColumn(Bytes.toBytes("columnfamily1"), Bytes.toBytes("username"), Bytes.toBytes("张三"));
put.addColumn(Bytes.toBytes("columnfamily1"), Bytes.toBytes("age"), Bytes.toBytes("18"));
put.addColumn(Bytes.toBytes("columnfamily1"), Bytes.toBytes("addr"), Bytes.toBytes("北京"));
//addColumn(byte[] family, byte[] qualifier, byte[] value)
Put put2 = new Put(Bytes.toBytes("rowkey002"));
put2.addColumn(Bytes.toBytes("columnfamily2"), Bytes.toBytes("username"), Bytes.toBytes("李四"));
put2.addColumn(Bytes.toBytes("columnfamily2"), Bytes.toBytes("age"), Bytes.toBytes("28"));
put2.addColumn(Bytes.toBytes("columnfamily2"), Bytes.toBytes("name"), Bytes.toBytes("上海"));
ArrayList<Put> puts = new ArrayList<>();
puts.add(put);
puts.add(put2);
// 插进去
table.put(puts);
table.close();
conn.close();
//循环插入大量数据
Table table = conn.getTable(TableName.valueOf("user_table"));
ArrayList<Put> puts = new ArrayList<>();
for (int i = 0; i <100; i++) {
Put put = new Put(Bytes.toBytes("rowkey"+i));
put.addColumn(Bytes.toBytes("columnfamily1"),Bytes.toBytes("name"),Bytes.toBytes("zhangsan"+i));
put.addColumn(Bytes.toBytes("columnfamily1"),Bytes.toBytes("age"),Bytes.toBytes("18"+i));
puts.add(put);
}
table.put(puts);
table.close();
conn.close();
三、删除数据
Table table = conn.getTable(TableName.valueOf("user_table"));
// 构造一个对象封装要删除的数据信息
Delete delete1 = new Delete(Bytes.toBytes("rowkey001"));
Delete delete2 = new Delete(Bytes.toBytes("rowkey002"));
//删除name列
delete2.addColumn(Bytes.toBytes("columnfamily1"),Bytes.toBytes("name"));
ArrayList<Delete> arrayList = new ArrayList();
arrayList.add(delete1);
arrayList.add(delete2);
table.delete(arrayList);
table.close();
conn.close();
二、HBase的查询实现只提供两种方式:
1、按指定RowKey获取唯一一条记录,get方法(org.apache.hadoop.hbase.client.Get)
Get 的方法处理分两种 : 设置了 ClosestRowBefore 和没有设置的 rowlock .主要是用来保证行的事务性,即每个 get 是以一个 row 来标记的.一个 row 中可以有很多 family 和 column.
2、按指定的条件获取一批记录,scan方法(org.apache.hadoop.hbase.client.Scan)
https://www.cnblogs.com/liupengpengg/p/9197157.html
实现条件查询功能使用的就是scan方式,scan在使用时有以下几点值得注意:
1、scan可以通过setCaching与setBatch方法提高速度(以空间换时间);
2、scan可以通过setStartRow与setEndRow来限定范围。范围越小,性能越高。
通过巧妙的RowKey设计使我们批量获取记录集合中的元素挨在一起(应该在同一个Region下),可以在遍历结果时获得很好的性能。
3、scan可以通过setFilter方法添加过滤器,这也是分页、多条件查询的基础。
3.全表扫描,即直接扫描整张表中所有行记录
Get:
HTable table = new HTable(hbaseConf, "table001");
Get get = new Get(Bytes.toBytes("row01"));
get.addFamily(Bytes.toBytes("colfamily01"));//也可不加
Result result = table.get(get);
// 从结果中取用户指定的某个key的value
byte[] value = result.getValue(Bytes.toBytes("colfamily01"), Bytes.toBytes("col02"));
System.out.println(" 查询结果:" + Bytes.toString(value));
System.out.println(new String(value));
for(KeyValue kv:result.raw()){
System.out.println("KeyValue---"+kv);
System.out.println("row=>"+new String(kv.getRow()));
System.out.println("family=>"+new String(kv.getFamily(),"utf-8")+": "+new String(kv.getValue(),"utf-8"));
System.out.println("qualifier=>"+new String(kv.getQualifier())+"\n");
}
// 遍历整行结果中的所有kv单元格
CellScanner cellScanner = result.cellScanner();
while (cellScanner.advance()){
Cell cell = cellScanner.current();
byte[] rowArray = cell.getRowArray(); //本kv所属的行键的字节数组
byte[] familyArray = cell.getFamilyArray(); //列族名的字节数组
byte[] qualifierArray = cell.getQualifierArray();//列名的字节数据
byte[] valueArray = cell.getValueArray();// value的字节数组
System.out.println("行键:"+new String(rowArray,cell.getRowOffset(),cell.getRowLength()));
System.out.println("列族:"+new String(familyArray,cell.getFamilyOffset(),cell.getFamilyLength()));
System.out.println("列名:"+new String(qualifierArray,cell.getQualifierOffset(),cell.getQualifierLength()));
System.out.println("value:"+new String(valueArray,cell.getValueOffset(),cell.getValueLength()));
table.close();
conn.close();
//批量查询rowkry
List<Get> gets = new ArrayList<Get>();
if (get != null) {
gets.add(get);
}
Result[] rss = table.get(gets);
for (Result rs : rss) {
List<HBaseRecord> recordList = this.getResult(rs);
String rowKey = Bytes.toString(rs.getRow());
recordsMap.put(rowKey, recordList);
}
Scan:
Table table = connection.getTable(TableName.valueOf("table3"));
Scan scan = new Scan();
//过滤器 某一列的范围 :13<age<18
Filter filter1 = new SingleColumnValueFilter("cf".getBytes(), "age".getBytes(), CompareFilter.CompareOp.GREATER_OR_EQUAL, "13".getBytes());
Filter filter2 = new SingleColumnValueFilter("cf".getBytes(), "age".getBytes(), CompareFilter.CompareOp.LESS_OR_EQUAL, "18".getBytes());
FilterList filterList = new FilterList();
filterList.addFilter(filter1);
filterList.addFilter(filter2);
scan.setFilter(filterList);
//扫某一列name列 addColumn(byte[] family, byte[] qualifier)
//scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
//行键过滤器
scan.setStartRow(Bytes.toBytes("00001"));
scan.setStopRow(Bytes.toBytes("00009"));
scanRow.addColumn(Bytes.toBytes("xcolfam01"), Bytes.toBytes("xcol02"))
.addColumn(Bytes.toBytes("xcolfam01"), Bytes.toBytes("xcol04"))
.setStartRow(Bytes.toBytes("xrow03"))
.setStopRow(Bytes.toBytes("xrow05"));
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
//getValue(byte[] family, byte[] qualifier)
String name = new String(result.getValue("cf".getBytes(), "name".getBytes()));
int age = Integer.valueOf(new String(result.getValue("cf".getBytes(), "age".getBytes())));
String rowKey = new String(result.getRow());
System.out.println(rowKey + "\t" + "cf:name-->" + name + ", cf:age-->" + age);
}
假设test表中存储着如下RowKey:
//12Aabb
//3aabb
//Aabb
//aabb
结果:
//12Aabb
//3aabb
三、Filter
代码参考https://www.cnblogs.com/similarface/p/5805973.html
scan "table_name" ,{FILTER=>”PrefixFilter(‘1’) AND ValueFilter(=,’substring:2015’)”, STARTROW=>’2’,ENDROW=>’3’, LIMIT=>10}
一般不建议用Filter,scan.setFilters(),通过filter设置的条件查不到数据时,响应速度非常慢,大概在十几秒,有时会超时
有两个参数类在各类Filter中经常出现,统一介绍下:
(1)比较运算符 CompareFilter.CompareOp
比较运算符用于定义比较关系,可以有以下几类值供选择:
EQUAL 相等
GREATER 大于
GREATER_OR_EQUAL 大于等于
LESS 小于
LESS_OR_EQUAL 小于等于
NOT_EQUAL 不等于
(2)比较器 ByteArrayComparable
通过比较器可以实现多样化目标匹配效果,比较器有以下子类可以使用:
BinaryComparator 匹配完整字节数组
BinaryPrefixComparator 匹配字节数组前缀
BitComparator
RegexStringComparator 正则表达式匹配
SubstringComparator 子串匹配
HBASE用到的几个filter
1,FilterList
FilterList 代表一个过滤器链,它可以包含一组即将应用于目标数据集的过滤器,过滤器间有
“与” FilterList.Operator.MUST_PASS_ALL
“或” FilterList.Operator.MUST_PASS_ONE 的关系。
FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE); //数据只要满足一组过滤器中的一个就可以
SingleColumnValueFilter filter1 = new SingleColumnValueFilter(cf,column,CompareOp.EQUAL,Bytes.toBytes("my value"));
list.add(filter1);
SingleColumnValueFilter filter2 = new SingleColumnValueFilter(cf,column,CompareOp.EQUAL,Bytes.toBytes("my other value"));
list.add(filter2);
Scan scan = new Scan();
scan.setFilter(list);
- 行键过滤器 RowFilter
Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes(“row-22”)));
scan.setFilter(filter1);
- 列族过滤器 FamilyFilter
Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes(“colfam3”)));
scan.setFilter(filter1);
- 列过滤器 QualifierFilter
Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes(“col-2”)));
- 值过滤器 ValueFilter
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(".4") );
SingleColumnValueFilter -列值过滤器,指定列值,还可以正则表达式查询方法
过滤某列值大于多少小于多少:
List<Filter> filters = new ArrayList<Filter>();
filters.add( new SingleColumnValueFilter(Bytes.toBytes("cfinfo"), //列族
Bytes.toBytes("column"), //列名
CompareOp.GREATER,Bytes.toBytes("1359901"]) ) //值
FilterList filterList1 = new FilterList(Operator.MUST_PASS_ALL,filters);
sn.setFilter(filterList1);
ValueFilter:
不需要指定某个列,含指某个值的列的数据都取出来,混在一起
HTable table = new HTable(config, tableName);
Scan scan = new Scan();
System.out.println("只列出值包含data1的列");
Filter filter1 = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("data1"));
scan.setFilter(filter1);
RowFilter
Rowkey中包括某些字符串,字符串存在list a内
提取rowkey以01结尾数据
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*01$"));
提取rowkey以包含201407的数据
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("201407"));
提取rowkey以123开头的数据
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new BinaryPrefixComparator("123".getBytes()));
for (int a=0;a<size;a++){
// Filter filter2 = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".*"+maclist.get(a)+"$"));
Filter filter2 = new RowFilter(CompareOp.EQUAL, new SubstringComparator(maclist.get(a)));
filters.add(filter2);
}
FilterList filterList1 = new FilterList(Operator.MUST_PASS_ONE,filters);
sn.setFilter(filterList1);
PageFilter 和 StartRow 配合使用,指定页面行数,返回对应行数的结果集。
需要注意的是,该过滤器并不能保证返回的结果行数小于等于指定的页面行数,因为过滤器是分别作用到各个region server的,它只能保证当前region返回的结果行数不超过指定页面行数。
(但这种方法效率比较低,且有冗余查询)
Table table = connection.getTable(TableName.valueOf("user"));
PageFilter pf = new PageFilter(2L);
Scan scan = new Scan();
scan.setFilter(pf);
scan.setStartRow(Bytes.toBytes("zhangsan_"));
ResultScanner rs = table.getScanner(scan);
返回的结果实际上有四条,因为这数据来自不同RegionServer,
[row:zhangsan_1495527850759],[family:account],[qualifier:idcard],[value:9897645464646],[time:1495556648664]
[row:zhangsan_1495527850759],[family:account],[qualifier:passport],[value:5689879898],[time:1495636370056]
[row:zhangsan_1495527850824],[family:account],[qualifier:country],[value:china],[time:1495636452285]
[row:zhangsan_1495527850824],[family:account],[qualifier:name],[value:zhangsan],[time:1495556648729]
/**
* 获取指定Rowkey正则的资讯列表(分页)
*
* @param pageSize 页大小
* @param lastRowKey 上一页最后的rowkey
* @param rowkeyReg Rowkey正则
* @return 资讯列表
*/
public List<GeneralNewsInfo> getData(int pageSize, String lastRowKey, String rowkeyReg) {
List<GeneralNewsInfo> dataList = new ArrayList<>();
getConnection();
try {
// 二级索引表查询索引数据
Table table = conn.getTable(TableName.valueOf(RcmdHbaseConstants.RCMD_NEWS_TITLE_INDEX));
Scan scan = new Scan();
// 构建模糊查询的Filter和分页的Filter
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
if (rowkeyReg != null) {
RegexStringComparator regex = new RegexStringComparator(rowkeyReg);
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, regex);
filterList.addFilter(filter);
}
Filter pageFilter = new PageFilter(pageSize);
Filter rowFilter = new RowFilter(CompareFilter.CompareOp.GREATER,
new BinaryComparator(Bytes.toBytes(lastRowKey)));
filterList.addFilter(pageFilter);
filterList.addFilter(rowFilter);
scan.setFilter(filterList);
ResultScanner rs = table.getScanner(scan);
Result result;
int rowNum = 0;
while ((result = rs.next()) != null) {
if (rowNum >= pageSize) {
break;
}
List<Cell> listCells = result.listCells();
for (Cell cell : listCells) {
String rowkey = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
// 索引拿到rowkey 去元数据表中获取元数据
由于其原生带有PrefixFilter这种对ROWKEY的前缀过滤查询,因此想着实现的后缀查询的过程中,发现这一方面相对来说还是空白。
因此,只能采用一些策略来实现,主要还是采用正则表达式的方式。
Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*_2015100112"));
// co RowFilterExample-2-Filter2 Another filter, this time using a regular expression to match the row keys.
scan.setFilter(filter2);
ResultScanner scanner2 = table.getScanner(scan);
// ^^ RowFilterExample
System.out.println("Scanning table #2...");
// vv RowFilterExample
for (Result res : scanner2) {
System.out.println(res);
}