一、 概述
关于Hbase的工作原理网上已经有很多详细介绍,就不在这里赘述了,我们直接研究代码
作为Hbase最新stable版,Hbase内部引入的部分hadoop2.5系列的jar包,这里且暂不去理会它,但是Java环境必须要8以上,我的正确运行环境为:
Centos-6.5
hadoop-2.6.5
Hbase-1.2.4
jdk1.8
二、代码设计
package com.unisk.bigdata.hbase;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HConstants;import org.apache.hadoop.hbase.HTableDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.Admin;import org.apache.hadoop.hbase.client.Connection;import org.apache.hadoop.hbase.client.ConnectionFactory;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;import org.apache.hadoop.hbase.util.Bytes;public class Hbase {
private static final String TABLE_NAME = "PEOPLE";
private static final String COLUMN_FAMILY = "baseINFO";
public static void createOrOverwrite(Admin admin, HTableDescriptor table) throws IOException {
if (admin.tableExists(table.getTableName())) {
admin.disableTable(table.getTableName());
admin.deleteTable(table.getTableName());
}
admin.createTable(table);
}
public static void createSchemaTables(Configuration config) throws IOException {
Connection connection = ConnectionFactory.createConnection(config);
Admin admin = connection.getAdmin();
HTableDescriptor table = admin.getTableDescriptor(TableName.valueOf(TABLE_NAME));
table.addFamily(new HColumnDescriptor(COLUMN_FAMILY).setCompressionType(Algorithm.NONE));
System.out.println("Creating table......");
createOrOverwrite(admin, table);
System.out.println("Done.");
}
@SuppressWarnings("deprecation")
public static void insert(Configuration conf)throws IOException{
//HTable table = new HTable(conf,TABLE_NAME.getBytes());
Connection conn = new Connection(conf);
Table table = conn.getTable(TableName.valueOf(TABEL_NAME));
Put put = new Put(Bytes.toBytes("0001"));
List<Put> puts = new LinkedList<Put>();
put.addColumn("baseINFO".getBytes(), "name".getBytes(), "wangxiaoming".getBytes());
put.addColumn("baseINFO".getBytes(), "age".getBytes(), "21".getBytes());
puts.add(put);
table.put(puts);
System.out.println("insert success!");
table.close();
}
public static void modifySchema (Configuration conf) throws IOException {
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
TableName tableName = TableName.valueOf(TABLE_NAME);
if (!admin.tableExists(tableName)) {
System.out.println("Table does not exist.");
System.exit(-1);
}
HTableDescriptor table = admin.getTableDescriptor(tableName);
// Update existing table
HColumnDescriptor newColumn = new HColumnDescriptor("NEWCF");
newColumn.setCompactionCompressionType(Algorithm.GZ);
newColumn.setMaxVersions(HConstants.ALL_VERSIONS);
admin.addColumn(tableName, newColumn);
// Update existing column family
HColumnDescriptor existingColumn = new HColumnDescriptor(COLUMN_FAMILY);
existingColumn.setCompactionCompressionType(Algorithm.GZ);
existingColumn.setMaxVersions(HConstants.ALL_VERSIONS);
table.modifyFamily(existingColumn);
admin.modifyTable(tableName, table);
// Disable an existing table
admin.disableTable(tableName);
// Delete an existing column family
admin.deleteColumn(tableName, COLUMN_FAMILY.getBytes("UTF-8"));
// Delete a table (Need to be disabled first)
admin.deleteTable(tableName);
}
public static void main(String... args) throws IOException {
Configuration config = HBaseConfiguration.create();
//Add any necessary configuration files (hbase-site.xml, core-site.xml)
//config.addResource(new Path(System.getenv("HBASE_CONF_DIR"), "hbase-site.xml"));
//config.addResource(new Path(System.getenv("HADOOP_CONF_DIR"), "core-site.xml"));
config.addResource("/wxm/software/hbase/hbase-1.2.4/conf/hbase-site.xml");
config.addResource("/wxm/software/hbase/hbase-1.2.4/conf/core-site.xml");
createSchemaTables(config);
insert(config);
//modifySchema(config);
}
}
三、代码分析
为了逻辑上严谨一点,这里写了createOrOverwrite方法,若初学API,大可不必浪费这几行空间,升级1.0之后,不建议使用HTablePool对象,可使用Admin来跟Hbase交互,另在main方法中,因为运行调用Hbase环境配置,故需要加入运行时参数,但简单起见,依然可以固定在代码里,关于put对象,以及HTableDescripter对象,可以以集合的形式传入从而增加效率,这个看需求,后期改进即可,后面如果有时间,我会将关于Hbase的通用增删改查的工具发布到github,大家有问题可以加qq1821088755一起交流。
转载于:https://blog.51cto.com/10901776/1880100