Hbase 常用功能工具类封装

# -*- coding: utf-8 -*-

import happybase
import datetime
import sys
sys.path.append('../')
from conf.config import hbase_config
from utlogging import LoggerWrite


class HBaseTools(object):
    def __init__(self, log):
        # 获取【host】配置信息
        self.host = hbase_config["host"]
        # 创建连接,通过参数size来设置连接池中连接的个数
        self.pool = happybase.ConnectionPool(size=3, host=self.host)
        # 初始化log
        self.log = log
        #


    """
        创建表之前先创建namespace
        HBase-shell > create_namespace 'ypl'  
        否则会报错 org.apache.hadoop.hbase.NamespaceNotFoundException
    """
    def create_table(self, name_space, table_name, column_family):
        with self.pool.connection() as connection:
            self.log.info("查看当前库中的表")
            self.log.info(connection.tables())
            name_space_talbe = '%s:%s' % (name_space, table_name)
            connection.create_table(
                name_space_talbe,
                {
                    column_family: dict(max_versions=30, IN_MEMORY='false')
                }
            )
            self.log.info("建表成功!")

    "查看表"
    def show_tables(self):
        with self.pool.connection() as connection:
            self.log.info("查看当前库中的表")
            self.log.info(connection.tables())

    "删除表"
    def delete_table(self,table_name):
        with self.pool.connection() as connection:
            self.log.info("先禁用表!")
            connection.disable_table(table_name)
            connection.delete_table(table_name,disable = False)
            self.log.info("表 %s 已删除!! "% table_name)

    "删除一行数据"
    def delete_row(self,tname,row_key):
        with self.pool.connection() as connection:
            table = connection.table(tname)
            table.delete(row_key, columns=None, timestamp=None, wal=True)
            # 删除指定列族下的列
            # table.delete('www.test2.com', columns=['cf1:name', 'cf1:price'])
        self.log.info("rowkey =>  %s 所在行已删除! " % row_key)


    """
     获取该rowkey最新插入的数据
    :param  row = '7190d16768419ad594e7c36207dd38b9'
    :return {'domain_info:pv': '30', 'domain_info:r_time': '2019-02-06 15:19:20', 
                'domain_info:domain': 'shanghaidisneyresort.com'}
    """
    def get_row(self, tname, row):
        with self.pool.connection() as connection:
            table = connection.table(tname)
            columns = ['domain_info:domain','domain_info:pv']
            info = table.row(row=row, columns=columns, timestamp=None, include_timestamp=False)
            self.log.info(info)

    """
     获取多个rowkey最新插入的数据
    :param  row = '7190d16768419ad594e7c36207dd38b9'
    :return {'domain_info:pv': '30', 'domain_info:r_time': '2019-02-06 15:19:20', 
                'domain_info:domain': 'shanghaidisneyresort.com'}
    """
    def get_rows(self, tname, rows):
        with self.pool.connection() as connection:
            table = connection.table(tname)
            info = table.rows(rows=rows, columns=None, timestamp=None, include_timestamp=False)
            self.log.info(info)


    "插入单行数据"
    def put_row(self,tname,domain_data,row_key):
        with self.pool.connection() as connection:
            table = connection.table(tname)
            table.put(row=row_key, data=domain_data)


    "获取七天内的数据"
    def get_7days_data(self,tname,row_key, column):
        with self.pool.connection() as connection:
            table = connection.table(tname)
            content = table.cells(row_key,column , 7 , timestamp=None, include_timestamp=False)
        self.log.info(content)


    """批量插入数据"""
    def batch_rows(self,tname,lines):
        self.log.info("开始批量插入数据...........")
        with self.pool.connection() as connection:
            table = connection.table(tname)
            with table.batch(batch_size=1024) as bat:
                for line in lines:
                    line_list = line.split('\t')
                    domain_md5 = line_list[0]
                    domain = line_list[1]
                    pv = line_list[2]
                    acct_day =  line_list[3]
                    domain_data = {'domain_info:domain':domain, 'domain_info:pv': pv,'domain_info:acct_day': acct_day }
                    bat.put(row=domain_md5, data=domain_data)


if __name__ == '__main__':

    log = LoggerWrite("Hbase_Test")
    hbt = HBaseTools(log)
    name_space = 'ypl'
    table_name = 'fly_back_domain'
    column_family = 'domain_info'
    tname = '%s:%s' % (name_space, table_name)
    ###############################################################################
    # 创建表
    # hbt.create_table(name_space, table_name, column_family)
    ###############################################################################
    #删除表
    hbt.delete_table("ypl:fly_back_domain")
    ###############################################################################
    # 插入一条数据
    # domain_data = {'domain_info:r_time': '2019-02-06 15:19:20', 'domain_info:domain': 'shanghaidisneyresort.com',
    #             'domain_info:pv': '30'}
    # row_key = '7190d16768419ad594e7c36207dd38b9'
    #
    # hbt.put_row(tname,domain_data,row_key)
    ###############################################################################
    # 获取一条数据
    # row = '1f429d84341a594c7b0fcfc422e0753c'
    # hbt.get_row(tname,row)
    ###############################################################################
    # 获取多条数据
    # row = ['dfe1e3bdcaef60e409089f970eff38e1','1f429d84341a594c7b0fcfc422e0753c']
    # hbt.get_rows(tname,row)
    ############################################################################
    # 删除一条数据
    # row = '7190d16768419ad594e7c36207dd38b9'
    # hbt.delete_row(tname,row)
    ############################################################################
    # 查询多个版本数据
    # column = 'domain_info:domain'
    # row = '1f429d84341a594c7b0fcfc422e0753c'
    # hbt.get_7days_data(tname, row, column)
    ############################################################################
    # 批量插入数据

    # starttime = datetime.datetime.now()
    # with open('/dist/Flyback_Data2Hbase/hbase_test.csv') as wf:
    #     lines = wf.readlines()
    #     hbt.batch_rows(tname, lines)
    #
    # endtime = datetime.datetime.now()
    # print(endtime - starttime).seconds

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值