经过几天来的看资料,写代码,终于对这个东东有点眉目了。
package linhon.crud;
import java.util.Date;
import java.util.Map.Entry;
import java.util.NavigableMap;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
/***
* test hbase crud operations
* @author leibnitz
* @create jan,12,11
*/
public class TestHbaseCrud {
/**
* 不存在rowKey则添加;否则代表修改某column(s).这些操作在行级上更新是原子的。
* @param tableName
* @param rowkey
* @param content
* @param addTime
* @throws Exception
*/
public static void add(String tableName,int rowkey,String content,Date addTime) throws Exception{
HBaseConfiguration hbaseConf = new HBaseConfiguration();
HTable htable = new HTable(hbaseConf, tableName);
htable.setAutoFlush(false);
htable.setWriteBufferSize(1024 * 5);
//add
byte[] rowKey = Bytes.toBytes(rowkey);
Put put = new Put(rowKey );
if(content != null)
put.add(Bytes.toBytes("info"), Bytes.toBytes("content"), addTime.getTime(),Bytes.toBytes(content));
if(addTime != null) //can add more than one column at the same time
put.add(Bytes.toBytes("info"), Bytes.toBytes("add_time"), addTime.getTime(),Bytes.toBytes(addTime.getTime()));
htable.put(put);
htable.flushCommits();
htable.close(); //invoke flushCommits() also
}
/**
* add a column(member) to specified row
* @param tableName
* @param rowkey
* @param family
* @param column
* @throws Exception
*/
public static void addColumnOnly(String tableName,int rowkey,String family,String column) throws Exception{
HBaseConfiguration hbaseConf = new HBaseConfiguration();
HTable htable = new HTable(hbaseConf, tableName);
htable.setAutoFlush(false);
htable.setWriteBufferSize(1024 * 5);
//add
byte[] rowKey = Bytes.toBytes(rowkey);
Put put = new Put(rowKey );
put.add(Bytes.toBytes(family), Bytes.toBytes(column),Bytes.toBytes(""));
htable.put(put);
htable.flushCommits();
htable.close(); //invoke flushCommits() also
}
public static void query(String tblName,int rowKey,String family,String... columns) throws Exception{
HBaseConfiguration hconf = new HBaseConfiguration();
HTable htbl = new HTable(hconf,tblName);
Scan s = new Scan();
ResultScanner scan = htbl.getScanner(s); //add a filer param if necessary
Result rst = null;
while(( rst = scan.next() ) != null){ //scan by row
int row = Bytes.toInt(rst.getRow());
System.out.println("row:" + row );
for(String col : columns){ //NOTE :可以使用rst.list()显示所有列
if(col.contains("time") || col.contains("date")){
System.out.printf(" %s:%2$tF %2$tH:%2$tM:%2$tS ",
col,Bytes.toLong(rst.getValue(Bytes.toBytes(family),Bytes.toBytes(col))));
}else{
String content = Bytes.toString(rst.getValue(Bytes.toBytes(family), Bytes.toBytes(col)));
System.out.printf(" %s:%s " ,col,content);
}
byte[] key = Bytes.toBytes(rowKey);
long ts = 1295977940837l;//1294813460620l;//1295977421536l;//1295976774855l;//1295969908063l;//1294813460625l;
//note:the second column param is family instead of column.
// String qualifier = family + KeyValue.COLUMN_FAMILY_DELIMITER + col;
final Get g = new Get(key);
g.addColumn(Bytes.toBytes(family), Bytes.toBytes(col));
g.setTimeStamp( ts); //query by time range.this means time range:[ts,ts+1)
boolean b = htbl.exists(g);
System.out.println(" has versions:" + ts + "," + b);
}
}
scan.close();
htbl.close();
}
/**
* test retrieve by versions
* @param tblName
* @param rowKey
* @param family
* @param maxVersions 由于建表时指定只保留二个版本,所以如果大于2时输出不会有三个版本。
* @param columns
* @throws Exception
*/
public static void queryByMaxVersions(String tblName,int rowKey,String family,int maxVersions,String...columns) throws Exception{
HBaseConfiguration hconf = new HBaseConfiguration();
HTable htbl = new HTable(hconf,tblName);
final Get g = new Get(Bytes.toBytes(rowKey));
if(columns == null || columns.length == 0)
g.addColumn(Bytes.toBytes(family));
else{
for(String col : columns){
g.addColumn(Bytes.toBytes(family), Bytes.toBytes(col));
}
}
g.setMaxVersions(maxVersions);
Result rst = htbl.get(g);
// System.out.println(rst.getMap());
for(Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> entry : rst.getMap().entrySet()){
System.out.println("family: " + Bytes.toString(entry.getKey()));
for(Entry<byte[],NavigableMap<Long, byte[]>> entry2 : entry.getValue().entrySet()){
String col = Bytes.toString(entry2.getKey());
System.out.println(" qualifier: " + col);
for(Entry<Long, byte[]> entry3 : entry2.getValue().entrySet()){
if(col.contains("time") || col.contains("date")){
System.out.println(" version: " + entry3.getKey() +
",value:" + Bytes.toLong(entry3.getValue()));
}else{
System.out.println(" version: " + entry3.getKey() +
",value:" + Bytes.toString(entry3.getValue()));
}
}
}
}
// 当输出所有columns,并且maxVersions >=2时,output is:
// family: info
// qualifier: add_time
// version: 1295977940837,value:1295977940837 已经是倒序输出(比早版本大)
// version: 1295977489609,value:1295977488769 此版本小
// qualifier: content
// version: 1295977940837,value:linhon 同上
// version: 1295976774855,value:bye,linhon
htbl.close();
}
//见add()
public static void modify(){
}
/**
* 删除可以根据以下条件进行:
* 1.family or family+column
* 2.timestamp range
* 3.regexp
*/
public static void deleteColumnData(String tblName,int rowKey,String family,String column,long timestamp) throws Exception{
HBaseConfiguration hconf = new HBaseConfiguration();
HTable htbl = new HTable(hconf,tblName);
Delete dlt = new Delete(Bytes.toBytes(rowKey));
dlt.deleteColumn(Bytes.toBytes(family), Bytes.toBytes(column), timestamp);
htbl.delete(dlt);
htbl.flushCommits();
htbl.close();
}
/**
* delete the column(and data) but family
* @param tblName
* @param rowKey
* @param family
* @param column
* @param timestamp
* @throws Exception
*/
public static void deleteColumnFamily(String tblName,String family,String column) throws Exception{
HBaseConfiguration hconf = new HBaseConfiguration();
HBaseAdmin admin = new HBaseAdmin(hconf);
//disable table is a must
if(admin.isTableEnabled(tblName))
admin.disableTable(tblName);
admin.deleteColumn(tblName, family /*+ ":" + column*/); //columnName参数是任意family,':',qualifier组合的,有没有qualifier均可
// admin.enableTable(tblName); //this is a artifice(技巧)
admin.flush(tblName);
}
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// add("test_user",1,"linhon",new Date());
// add("test_user",1,"hello,linhon",new Date());
// add("test_user",1,"bye,linhon",new Date());
// add("test_user",1,null,new Date());
// add("test_user",1,null,new Date());
// System.out.println(System.currentTimeMillis());
// query("test_user",1,"info",new String[]{"content","add_time"});
// queryByMaxVersions("test_user",1,"info",3,new String[]{"content","add_time"});
// queryByMaxVersions("test_user",1,"info",3,new String[]{"content"/*,"add_time"*/});
// addColumnOnly("test_user", 1, "info", "age");
// deleteColumnData("test_user",1,"info","age",1296030610746l);
// deleteColumnFamily("test_user","info","age");
addColumnOnly("test_user2", 1, "num", "age");
// deleteColumnFamily("test_user2","num","age"); //test table
}
}
我觉得既然它有横向切分(书上是这样说的,但没有在真正分布式跑过,只在伪分布,所以不是否正确??),非结构化
儲存,支持版本化,那么就不应该只是进行简单的CRUD的普通表似的操作,所以我挖倔一些新功能点出来。
注意问题:
1.旧版本的:exists(final byte [] row, final byte [] column,long timestamp),其中的timestamp代表是从0开始到timestamp
的time range;新版本的exists(Get)可以指定一个具体的timestamp范围而不是使用从0开始的范围。
hbase(main):014:0> scan 'test_user'
ROW COLUMN+CELL
\x00\x00\x00\x01 column=info:add_time, timestamp=1294813460625, value=\x00\x00\x01-x\xE5uw
\x00\x00\x00\x01 column=info:content, timestamp=1295976774855, value=bye,linhon
2.pub或get中的addColumn(column)如果只有一个参数,代表这是old format column,that means the form is:<family:column>
3.Htable是对表数据的修改查询操作;HBaseAdmin是对表结构操作;
4.在shell下进行的scan操作,各cell只输出最后一个version的value
5.添加数据时,row key是必须指定的。
6.在已有数据情况下添加新column,HTable中需要指定一个rowkey,代表只添加到些行上,其它行是没有这列数据的。
7.deleteColumn(tbl,col)使用family+":"+column作为col时删除全部列(family)
8.hbase无法做到动态增加/删除列族(要先disable);删除只能删除列族,不能单独删除column成员