hbash基本命令与实践

一、查看hbash状态

status

二、增、删、改,查

创建表:

# create '表名','columnFamily名','columnFamily名'

create 'table','cf1',cf2'

删除表: 显示表设置成disable,然后才能删除。

# disable '表名'
disable 'table'
# drop '表名'
drop 'table'

增加、修改columnFamily:

columnFamily不存在就是添加,存在就是修改columnFamily的信息
# alter '表名',{columnFamily属性}
alter 'table',{NAME=>'cf3',VERSIONS=>3,IN_MEMORY=>TRUE}

删除columnFamily:

# alter '表名',{NAME=>'columnFamily名称',METHOD=>'delete'}
alter 'table',{NAME=>'cf3',METHOD=>'delete'}

写数据:

# put '表名','rowkey','colfamily:colname','value'
put 'table','1001','cf1:column1','value1'

查数据:

# 全表查询,慎用,数据量大的时候要记得加限制条件
# scan '表名'
scan 'table'

# 读一条记录,根据rowkey去查
# get '表名','rowkey'
get 'table','1001'

# 根据时间戳查询
# get '表名','rowkey',{COLUMN=>'列族名:列名',TIMESTAMP=>时间戳}
put 'table','1002','cf1:column2','value1'
put 'table','1002','cf1:column2','value2'
get 'table','1002',{COLUMN=>'cf1:column2',TIMESTAMP=>1548778946757}

# 根据版本号查询
get '表名','rowkey',{OLUMN=>'列族名:列名',VERSIONS=>版本号}
get 'table','1002',{COLUMN=>'cf1:column2',VERSIONS=>2}

# 根据过滤器查询,例如value的过滤器
# scan '表名',FILTER=>"过滤器名字(=,'匹配模式:值')"
scan 'table',FILTER=>"ValueFilter(=,'binary:value1')" #全匹配
scan 'table',FILTER=>"ValueFilter(=,'substring:lue')" #模糊匹配,这种模式好像有点问题,有部分没有匹配出来,不知道为啥
scan 'table',FILTER=>"ColumnPrefixFilter('column2') AND ValueFilter(=,'binary:value1')" # 多条件匹配

删数据:

# truncate '表名'
truncate 'table'

注:

  • 数据写成功后,可以在hbase的命令行中看到数据,但是在hafs上却看不到,这是因为数据还在内存中,没有写到hdfs中。
  • flush '表名',可以将数据从内存写到hdfs中。

三、解析存储路径

创建成功后,可以在hdfs上看到:

  • 在hbase的配置文件(hbase-site.xml)中会配置hbase在hdfs下的根目录。创建的表就保存在这个根目录的data中。
  • 表下面对应的文件夹是region,“cab86291ffcc356777bbe8b39a28bb5a”是他的region号,在web监控页面可以看到这个region保存在哪台机器。
  • 我这里没有创建名称空间,所以他是保存在预定义的名称空间default下面。
  • columnFamily也是用个文件夹来保存数据,对应图中的cf1与cf2。

四、表属性

desc 'table'

  • NAME:列族的名字

  • BLOOMFILTER:控制粒度为行级别,具体可查看: https://www.cnblogs.com/cssdongl/p/6396630.html

  • VERSIONS:默认是 1,意思是数据保留1个版本

  • IN_MEMORY:

  • KEEP_DELETED_CELLS:

  • DATA_B:

  • LOCK_ENCODING:

  • TTL:默认是 2147483647秒 ,大概是 68 年,这个参数是说明该列族数据的存活时间,单位是 秒

  • COMPRESSION:设置压缩算法

  • MIN_VERSIONS :最小存储版本数

  • BLOCKCACHE:数据块缓存配置,如果经常顺序访问或很少被访问,可以关闭列族的缓存,列族缓存默认打开

  • BLOCKSIZE:设置HFile数据块大小(默认64kb)

  • REPLICATION_SCOPE:

五、python操作hbase

thrift的安装参考官方文档: http://thrift.apache.org/docs/install/centos

  • 进入hbase的lib开启thrift服务

      cd /usr/local/src/hbase-0.98.6-hadoop2/bin
      ./hbase-daemon.sh start thrift
      jps
      netstat -antup | grep 25310
    

  • 将thrift的python模块复制到写代码的文件夹

      cd /usr/local/src/thrift-0.8.0/lib/py/build/lib.linux-x86_64-2.7
    
      mkdir /1_hbaseTest
      cp -r thrift/ /1_hbaseTest
    
  • 下载,解压hbase源码包

      wget http://archive.apache.org/dist/hbase/hbase-0.98.6/hbase-0.98.6-src.tar.gz
      tar -zxvf hbase-0.98.6-src.tar.gz
    
  • 根据约束文件生成可被python加载的模块

      cd /1_hbaseTest/hbase-0.98.6/hbase-thrift/src/main/resources/org/apache/hadoop/hbase/thrift
      thrift --gen py Hbase.thrift
    

  • 复制到写代码的文件夹

      cd gen-py/
      cp -r hbase/ /1_hbaseTest/
    
  • 上代码(创建表):

      from thrift import Thrift
      from thrift.transport import TSocket
      from thrift.transport import TTransport
      from thrift.protocol import TBinaryProtocol
    
      from hbase import Hbase
      from hbase.ttypes import *
    
      transport = TSocket.TSocket('master',9090)
      transport = TTransport.TBufferedTransport(transport)
    
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
      client = Hbase.Client(protocol)
    
      transport.open()
    
    
      #=============================================
    
      base_info_contents = ColumnDescriptor(name='meta-data',maxVersions=1)
      other_info_contents = ColumnDescriptor(name='flags',maxVersions=1)
    
      client.createTable('new_table',[base_info_contents,other_info_contents])
    
      print client.getTableNames()
    

  • 上代码(插入数据):

      from thrift import Thrift
      from thrift.transport import TSocket
      from thrift.transport import TTransport
      from thrift.protocol import TBinaryProtocol
    
      from hbase import Hbase
      from hbase.ttypes import *
    
      transport = TSocket.TSocket('master', 9090)
      transport = TTransport.TBufferedTransport(transport)
    
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
      client = Hbase.Client(protocol)
    
      transport.open()
    
      tableName = 'new_table'
      rowkey = '1001'
    
      mutations = [Mutation(column="meta-data:name",value="python"), \
      				Mutation(column="meta-data:tag",value="pop"), \
      				Mutation(column="flags:is_valid",value="TRUE")]
    
      client.mutateRow(tableName,rowkey,mutations,None)
    

  • 上代码(读数据):

      from thrift import Thrift
      from thrift.transport import TSocket
      from thrift.transport import TTransport
      from thrift.protocol import TBinaryProtocol
    
      from hbase import Hbase
      from hbase.ttypes import *
    
      transport = TSocket.TSocket('master', 9090)
      transport = TTransport.TBufferedTransport(transport)
    
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
      client = Hbase.Client(protocol)
    
      transport.open()
    
      table_name = "new_table"
      rowkey = '1001'
    
      result = client.getRow(table_name,rowkey,None)
    
      for r in result:
      		print "the row is ",r.row
      		print "the name is ",r.columns.get("meta-data:name").value
      		print "the flag is ",r.column.get("flags:is_valid").value
    

  • 上代码(扫描数据):

      from thrift import Thrift
      from thrift.transport import TSocket
      from thrift.transport import TTransport
      from thrift.protocol import TBinaryProtocol
    
      from hbase import Hbase
      from hbase.ttypes import *
    
      transport = TSocket.TSocket('master', 9090)
      transport = TTransport.TBufferedTransport(transport)
    
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
      client = Hbase.Client(protocol)
    
      transport.open()
    
      table_name = "new_table"
    
      scan = TScan()
    
      id = client.scannerOpenWithScan(table_name,scan,None)
      result = client.scannerGetList(id,10)
    
      for r in result:
      		print '====='
      		print 'the row is ' , r.row
    
      		for k,v in r.columns.items():
      				print "\t".join([k,v.value])
    

六、MapReduce操作hbase

mkdir mr_hbase
cd mr_hbase/

准备好数据

run.sh:

HADOOP_CMD="/usr/local/src/hadoop-2.6.5/bin/hadoop"
STREAM_JAR_PATH="/usr/local/src/hadoop-2.6.5/share/hadoop/tools/lib/hadoop-streaming-2.6.5.jar"

INPUT_FILE_PATH="/input.data"
OUTPUT_PATH="/output_hbase"

$HADOOP_CMD fs -rmr -skipTrash $OUTPUT_PATH

$HADOOP_CMD jar $STREAM_JAR_PATH \
		-input $INPUT_FILE_PATH \
		-output $OUTPUT_PATH \
		-mapper "python map.py mapper_func" \
		-file ./map.py \
		-file "./hbase.tgz" \
		-file "./thrift.tgz"

map.py:

#!/usr/bin/python

import os
import sys

os.system('tar zxvf hbase.tgz > /dev/null')
os.system('tar zxvf thrift.tgz > /dev/null')

reload(sys)
sys.setdefaultencoding('utf-8')

sys.path.append("./")

from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

from hbase import Hbase
from hbase.ttypes import *

transport = TSocket.TSocket('master', 9090)
transport = TTransport.TBufferedTransport(transport)

protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = Hbase.Client(protocol)

transport.open()

tableName = "new_table"

def mapper_func():
		for line in sys.stdin:
				ss = line.strip().split('\t')
				if len(ss) != 2:
						continue
				key = ss[0].strip()
				val = ss[1].strip()

				rowkey = key

				mutations = [Mutation(column="meta-data:name",value=val), \
								Mutation(column="flags:is_valid",value="TRUE")]

				client.mutateRow(tableName,rowkey,mutations,None)


if __name__ == "__main__":
		module = sys.modules[__name__]
		func = getattr(module, sys.argv[1])
		args= None
		if len(sys.argv) > 1:
				args = sys.argv[2:]

		func(*args)

七、java操作hbase

# 准备工作
create 'table_java','cf1'
put 'table_java','1001','cf1:name','java1'
put 'table_java','1002','cf1:name','java2'
put 'table_java','1001','cf1:age','25'
put 'table_java','1002','cf1:gender','man'

public class PutOne {
	public static final String tableName = "table_java";
	public static final String columnFamily = "cf1";

	public static Configuration conf = HBaseConfiguration.create();
	private static HTable table;

	public static void main(String[] argv){
		conf.set("habse.master","192.168.119.10");
		conf.set("hbase.zookeeper.quorum","192.168.119.10,192.168.119.11,192.168.119.12");

		add();


	}

	private static void add() {
		addOneRecord(tableName, "ip=192.168.87.200-001", columnFamily, "ip", "192.168.87.101");
		addOneRecord(tableName, "ip=192.168.87.200-001", columnFamily, "userid", "1100");
		addOneRecord(tableName, "ip=192.168.87.200-002", columnFamily, "ip", "192.168.1.201");
		addOneRecord(tableName, "ip=192.168.87.200-002", columnFamily, "userid", "1200");
		addOneRecord(tableName, "ip=192.168.87.200-003", columnFamily, "ip", "192.168.3.201");
		addOneRecord(tableName, "ip=192.168.87.200-003", columnFamily, "userid", "1300");
	}

	private static void addOneRecord(String tableName, String rowKey, String columnFamily, String qualifier, String value) {
		try {
			table = new HTable(conf,tableName);
			Put put = new Put(Bytes.toBytes(rowKey));
			put.add(Bytes.toBytes(columnFamily),Bytes.toBytes(qualifier),Bytes.toBytes(value));
			table.put(put);
			System.out.print("insert record " + rowKey + " to table "+ tableName + "success");
		} catch (IOException e) {
			e.printStackTrace();
		}

	}
}

在idea下用maven打成jar包后上传至服务器执行。

可能遇见的问题:

(1)hbaseTest-1.0-SNAPSHOT.jar中没有主清单属性

解决方法:在这个jar包的META-INF文件夹下的MANIFEST.MF文件加上类名,详情可以参考:https://jingyan.baidu.com/article/db55b60990f6084ba30a2fb8.html

(2)NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration

解决方法: 在hadoop安装目录下找到hadoop-env.sh文件, 添加 : export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/local/src/hbase-0.98.6-hadoop2/lib/*

/usr/local/src/hbase-0.98.6-hadoop2 为自己hbase安装路径

(3)

我在web页面看到如下

原因是我自己讲这个表设置成disable状态。。自作自受。。。

解决方法: enable 'table_java'

转载于:https://my.oschina.net/u/3746234/blog/3007740

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值