hbase版本:1.3.1
目的:HBase新API的使用方法。
尝试并验证了如下几种java api的使用方法。
1.创建表
2.创建表(预分区)
3.单条插入
4.批量插入
5.批量插入(客户端缓存)
6.单条get
7.批量get
8.简单scan
9.混合使用
■实际代码
■pom.xml文件
hbase_sample
hbase_sample
1.0
4.0.0
org.apache.hbase
hbase-client
1.3.1
org.apache.hbase
hbase-server
1.3.1
org.apache.hbase
hbase-common
1.3.1
src/main/java
target/classes
maven-compiler-plugin
3.1
1.7
1.7
maven-assembly-plugin
2.4
jar-with-dependencies
make-assembly
package
single
===1.创建表===
packageapi;importorg.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.*;importorg.apache.hadoop.hbase.client.Admin;importorg.apache.hadoop.hbase.client.Connection;importorg.apache.hadoop.hbase.client.ConnectionFactory;importorg.apache.hadoop.hbase.client.Durability;importorg.apache.hadoop.hbase.io.compress.Compression;importorg.apache.hadoop.hbase.io.encoding.DataBlockEncoding;importorg.apache.hadoop.hbase.regionserver.BloomType;public classcreate_table_sample1 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Admin admin=connection.getAdmin();
HTableDescriptor desc= new HTableDescriptor(TableName.valueOf("TEST1"));//MemStore大小。默认128M,不能小于1M
desc.setMemStoreFlushSize(2097152L);//HFile最大size。默认10G。不能小于2M
desc.setMaxFileSize(10485760L);//日志flush的时候是同步写,还是异步写
desc.setDurability(Durability.SYNC_WAL);
HColumnDescriptor family1= newHColumnDescriptor(constants.COLUMN_FAMILY_DF.getBytes());
family1.setTimeToLive(2 * 60 * 60 * 24); //过期时间
family1.setMaxVersions(2); //版本数
family1.setBlockCacheEnabled(true);
desc.addFamily(family1);
HColumnDescriptor family2= newHColumnDescriptor(constants.COLUMN_FAMILY_EX.getBytes());//数据生存时间
family2.setTimeToLive(3 * 60 * 60 * 24);//最小版本数,默认0。
family2.setMinVersions(2);//最大版本数,默认-1
family2.setMaxVersions(3);//bloom过滤器,有ROW和ROWCOL,ROWCOL除了过滤ROW还要过滤列族。默认ROW。
family2.setBloomFilterType(BloomType.ROW);//数据块的大小,单位bytes,默认值是65536。
family2.setBlocksize(65536);//数据块缓存,保存着每个HFile数据块的startKey。默认true。
family2.setBlockCacheEnabled(true);// //写的时候缓存bloom。默认false。//family2.setCacheBloomsOnWrite(false);// //写的时候缓存索引。默认false。//family2.setCacheIndexesOnWrite(false);// //存储的时候使用压缩算法。默认NONE。//family2.setCompressionType(Compression.Algorithm.NONE);// //进行compaction的时候使用压缩算法。默认NONE。//family2.setCompactionCompressionType(Compression.Algorithm.NONE);// //压缩内存和存储的数据,区别于Snappy。默认NONE。//family2.setDataBlockEncoding(DataBlockEncoding.NONE);// //关闭的时候,是否剔除缓存的块。默认false。//family2.setEvictBlocksOnClose(false);// //让数据块缓存在LRU缓存里面有更高的优先级。默认false。//family2.setInMemory(false);// //集群间复制的时候,如果被设置成REPLICATION_SCOPE_LOCAL就不能被复制了。默认0//family2.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
desc.addFamily(family2);
admin.createTable(desc);
admin.close();
connection.close();
}
}
===2.创建表(预分区)===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.HColumnDescriptor;importorg.apache.hadoop.hbase.HTableDescriptor;importorg.apache.hadoop.hbase.TableName;importorg.apache.hadoop.hbase.client.Admin;importorg.apache.hadoop.hbase.client.Connection;importorg.apache.hadoop.hbase.client.ConnectionFactory;importorg.apache.hadoop.hbase.regionserver.BloomType;importorg.apache.hadoop.hbase.util.Bytes;public classcreate_table_sample2 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Admin admin=connection.getAdmin();
TableName table_name= TableName.valueOf("TEST1");if(admin.tableExists(table_name)) {
admin.disableTable(table_name);
admin.deleteTable(table_name);
}
HTableDescriptor desc= newHTableDescriptor(table_name);
HColumnDescriptor family1= newHColumnDescriptor(constants.COLUMN_FAMILY_DF.getBytes());
family1.setTimeToLive(3 * 60 * 60 * 24); //过期时间
family1.setBloomFilterType(BloomType.ROW); //按行过滤
family1.setMaxVersions(3); //版本数
desc.addFamily(family1);
HColumnDescriptor family2= newHColumnDescriptor(constants.COLUMN_FAMILY_EX.getBytes());
family2.setTimeToLive(2 * 60 * 60 * 24); //过期时间
family2.setBloomFilterType(BloomType.ROW); //按行过滤
family2.setMaxVersions(2); //版本数
desc.addFamily(family2);byte[][] splitKeys ={
Bytes.toBytes("row01"),
Bytes.toBytes("row02"),
Bytes.toBytes("row04"),
Bytes.toBytes("row06"),
Bytes.toBytes("row08"),
};
admin.createTable(desc, splitKeys);
admin.close();
connection.close();
}
}
===3.单条插入===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;importorg.apache.hadoop.hbase.util.Bytes;importjava.util.Random;public classtable_put_sample1 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
Random random= newRandom();
String[] rows= new String[] {"01", "02", "03"};
String[] names= new String[] {"zhang san", "li si", "wang wu", "wei liu"};
String[] sexs= new String[] {"men", "women"};
String[] heights= new String[] {"165cm", "170cm", "175cm", "180cm"};
String[] weights= new String[] {"50kg", "55kg", "60kg", "65kg", "70kg", "75kg", "80kg"};
Put put= new Put(Bytes.toBytes("row" +rows[random.nextInt(rows.length)]));
String name=names[random.nextInt(names.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes(), name.getBytes());
String sex=sexs[random.nextInt(sexs.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes(), sex.getBytes());
String height=heights[random.nextInt(heights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes(), height.getBytes());
String weight=weights[random.nextInt(weights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes(), weight.getBytes());
table.put(put);
table.close();
connection.close();
}
}
===4.批量插入===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;importorg.apache.hadoop.hbase.util.Bytes;importjava.util.ArrayList;importjava.util.List;importjava.util.Random;public classtable_put_sample2 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
Random random= newRandom();
String[] rows= new String[] {"01", "02", "03"};
String[] names= new String[] {"zhang san", "li si", "wang wu", "wei liu"};
String[] sexs= new String[] {"men", "women"};
String[] heights= new String[] {"165cm", "170cm", "175cm", "180cm"};
String[] weights= new String[] {"50kg", "55kg", "60kg", "65kg", "70kg", "75kg", "80kg"};
List puts = new ArrayList<>();for(String row : rows) {
Put put= new Put(Bytes.toBytes("row" +row));
String name=names[random.nextInt(names.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes(), name.getBytes());
String sex=sexs[random.nextInt(sexs.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes(), sex.getBytes());
String height=heights[random.nextInt(heights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes(), height.getBytes());
String weight=weights[random.nextInt(weights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes(), weight.getBytes());
puts.add(put);
}
table.put(puts);
table.close();
connection.close();
}
}
===5.批量插入(客户端缓存)===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;importorg.apache.hadoop.hbase.util.Bytes;importjava.util.ArrayList;importjava.util.List;importjava.util.Random;public classtable_put_sample4 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
conf.set("hbase.client.write.buffer", "1048576");//1M
Connection connection =ConnectionFactory.createConnection(conf);
BufferedMutator table=connection.getBufferedMutator(TableName.valueOf(constants.TABLE_NAME));
System.out.print("[--------]write buffer size = " +table.getWriteBufferSize());
Random random= newRandom();
String[] rows= new String[] {"01", "02", "03", "04", "05"};
String[] names= new String[] {"zhang san", "li si", "wang wu", "wei liu"};
String[] sexs= new String[] {"men", "women"};
String[] heights= new String[] {"165cm", "170cm", "175cm", "180cm"};
String[] weights= new String[] {"50kg", "55kg", "60kg", "65kg", "70kg", "75kg", "80kg"};
List batch = new ArrayList<>();for(String row : rows) {
Put put= new Put(Bytes.toBytes("row" +row));
String name=names[random.nextInt(names.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes(), name.getBytes());
String sex=sexs[random.nextInt(sexs.length)];
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes(), sex.getBytes());
String height=heights[random.nextInt(heights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes(), height.getBytes());
String weight=weights[random.nextInt(weights.length)];
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes(), weight.getBytes());
batch.add(put);
}
table.mutate(batch);
table.flush();
table.close();
connection.close();
}
}
===6.单条get===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;public classtable_get_sample3 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.80,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
Get get= new Get(("row01").getBytes());
get.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes());
get.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes());
get.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes());
get.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes());
Result result=table.get(get);byte[] name = result.getValue(constants.COLUMN_FAMILY_DF.getBytes(), "name".getBytes());byte[] sex = result.getValue(constants.COLUMN_FAMILY_DF.getBytes(), "sex".getBytes());byte[] height = result.getValue(constants.COLUMN_FAMILY_EX.getBytes(), "height".getBytes());byte[] weight = result.getValue(constants.COLUMN_FAMILY_EX.getBytes(), "weight".getBytes());
System.out.print("[------]name=" + new String(name) + "\n");
System.out.print("[------]sex=" + new String(sex) + "\n");
System.out.print("[------]height=" + new String(height) + "\n");
System.out.print("[------]weight=" + new String(weight) + "\n");
table.close();
connection.close();
}
}
===7.批量get===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;importjava.util.ArrayList;importjava.util.List;public classtable_get_sample4 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
List gets = new ArrayList<>();
Get get1= new Get(("row01").getBytes());
get1.addFamily(constants.COLUMN_FAMILY_DF.getBytes());
get1.addFamily(constants.COLUMN_FAMILY_EX.getBytes());
gets.add(get1);
Get get2= new Get(("row02").getBytes());
get2.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes());
get2.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes());
get2.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes());
get2.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes());
gets.add(get2);
Result[] results=table.get(gets);for( Result result : results) {byte[] name = result.getValue(constants.COLUMN_FAMILY_DF.getBytes(), "name".getBytes());byte[] sex = result.getValue(constants.COLUMN_FAMILY_DF.getBytes(), "sex".getBytes());byte[] height = result.getValue(constants.COLUMN_FAMILY_EX.getBytes(), "height".getBytes());byte[] weight = result.getValue(constants.COLUMN_FAMILY_EX.getBytes(), "weight".getBytes());
System.out.print("[------]name=" + new String(name) + "\n");
System.out.print("[------]sex=" + new String(sex) + "\n");
System.out.print("[------]height=" + new String(height) + "\n");
System.out.print("[------]weight=" + new String(weight) + "\n");
}
table.close();
connection.close();
}
}
===8.简单scan===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;public classtable_scan_sample3 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
Connection connection=ConnectionFactory.createConnection(conf);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
Scan scan= newScan();
scan.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes());
scan.addFamily(constants.COLUMN_FAMILY_EX.getBytes());
ResultScanner rs=table.getScanner(scan);for (Result r = rs.next(); r != null; r =rs.next()) {byte[] row_key =r.getRow();
System.out.print("[------]row_key=" + new String(row_key) + "\n");byte[] name = r.getValue(constants.COLUMN_FAMILY_DF.getBytes(), "name".getBytes());
System.out.print("[------]name=" + new String(name) + "\n");byte[] weight = r.getValue(constants.COLUMN_FAMILY_EX.getBytes(), "weight".getBytes());
System.out.print("[------]weight=" + new String(weight) + "\n");
}
table.close();
connection.close();
}
}
===9.混合使用===
importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfiguration;importorg.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;importjava.util.ArrayList;importjava.util.List;public classtable_batch_sample2 {public static void main(String[] args) throwsException {
Configuration conf=HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.80,192.168.1.81,192.168.1.82");
conf.set("hbase.client.write.buffer", "1048576");//1M
Connection connection =ConnectionFactory.createConnection(conf);
BufferedMutator mutator=connection.getBufferedMutator(TableName.valueOf(constants.TABLE_NAME));
List batch = new ArrayList<>();byte[] row_key =random.getRowKey();
Put put= newPut(row_key);
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"name".getBytes(), random.getName());
put.addColumn(constants.COLUMN_FAMILY_DF.getBytes(),"sex".getBytes(), random.getSex());
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"height".getBytes(), random.getHeight());
put.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes(), random.getWeight());
batch.add(put);
Delete delete= newDelete(row_key);
delete.addFamily(constants.COLUMN_FAMILY_DF.getBytes());
delete.addColumn(constants.COLUMN_FAMILY_EX.getBytes(),"weight".getBytes());
batch.add(delete);
mutator.mutate(batch);
Table table=connection.getTable(TableName.valueOf(constants.TABLE_NAME));
Get get= newGet(row_key);
Result result1=table.get(get);
System.out.print("[------]name=" + getValue(result1, constants.COLUMN_FAMILY_DF, "name") + "\n");
System.out.print("[------]sex=" + getValue(result1, constants.COLUMN_FAMILY_DF, "sex") + "\n");
System.out.print("[------]height=" + getValue(result1, constants.COLUMN_FAMILY_EX, "height") + "\n");
System.out.print("[------]weight=" + getValue(result1, constants.COLUMN_FAMILY_EX, "weight") + "\n");
mutator.flush();
Result result2=table.get(get);
System.out.print("[------]name=" + getValue(result2, constants.COLUMN_FAMILY_DF, "name") + "\n");
System.out.print("[------]sex=" + getValue(result2, constants.COLUMN_FAMILY_DF, "sex") + "\n");
System.out.print("[------]height=" + getValue(result2, constants.COLUMN_FAMILY_EX, "height") + "\n");
System.out.print("[------]weight=" + getValue(result2, constants.COLUMN_FAMILY_EX, "weight") + "\n");
table.close();
mutator.close();
connection.close();
}private staticString getValue(Result rs, String family, String column) {byte[] value =rs.getValue(family.getBytes(), column.getBytes());if (value == null) {return "";
}else{return newString(value);
}
}
}
===补充===
1)HTableDescriptor特性
可以通过 HTableDescriptor对象设置Table的相关特性 ,比如
//日志flush的时候是同步写,还是异步写
tb.setDurability(Durability.SYNC_WAL);//region size大小,当一个region中的最大store文件达到这个size时,region就开始分裂
tb.setMaxFileSize(1024*1024*1024);//MemStore大小,当memstore达到这个值时,开始往磁盘中刷数据
tb.setMemStoreFlushSize(256*1024*1024);
如果设置了MemStore时,HBase的数据会是先写入内存,数据累计达到内存阀值时才往磁盘中flush数据,
所以,如果在数据还没有flush进硬盘时,RegionServer down掉了,内存中的数据将丢失。
想解决这个场景的问题可以通过设置WAL日志级别来解决。即:tb.setDurability(Durability.SYNC_WAL);
setDurability(Durability d)方法可以在相关的三个对象中使用,分别是:HTableDescriptor,Delete,Put。
其中Delete和Put的该方法都是继承自父类org.apache.hadoop.hbase.client.Mutation。
分别针对表、插入操作、删除操作设定WAL日志写入级别。
需要注意的是,Delete和Put并不会继承Table的Durability级别(已实测验证)。
Durability是一个枚举变量,如果不通过该方法指定WAL日志级别,则为默认USE_DEFAULT级别。
USE_DEFAULT //全局默认的WAL写入级别,即 SYNC_WAL
ASYNC_WAL//当数据变动时,异步写WAL日志
SYNC_WAL//当数据变动时,同步写WAL日志
FSYNC_WAL//当数据变动时,同步写WAL日志,并且,强制将数据写入磁盘
SKIP_WAL//不写WAL日志
2)HColumnDescriptor特性
可以通过 HColumnDescriptor对象设置ColumnFamily的特性 ,比如:
//压缩内存中和存储文件中的数据,默认NONE(不压缩)
tb.setDataBlockEncoding(DataBlockEncoding.PREFIX);//bloom过滤器:NONE,ROW(默认值)和ROWCOL.ROWCOL除了过滤ROW还要过滤列族
tb.setBloomFilterType(BloomType.ROW);//集群间复制的时候,如果被设置成REPLICATION_SCOPE_LOCAL(默认值)就不能被复制了
tb.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);//数据保存的最大版本数.默认是Long.MAX
tb.setMaxVersions(3);//数据保存的最小版本数.默认是0.配合TTL使用
tb.setMinVersions(1);//数据保存的最长时间,即TTL,单位是ms
tb.setTimeToLive(18000);//设定数据存储的压缩类型.默认无压缩(NONE)
tb.setCompressionType(Algorithm.SNAPPY);//是否保存那些已经删除掉的cell
tb.setKeepDeletedCells(false);//设置数据保存在内存中以提高响应速度
tb.setInMemory(true);//块缓存,保存着每个HFile数据块的startKey
tb.setBlockCacheEnabled(true);//块的大小,默认值是65536
tb.setBlocksize(64*1024);
--END--