python直接操作hive hdfs实例代码

from hdfs import InsecureClient

lian = InsecureClient(url='http://master:9870')  # 指定远程地址,和用户名
print(lian.list('/'))  # 列出根目录下的所有文件
lian.write(hdfs_path='/niubi/shang2', overwrite=True, data='世界你好,我来了'.encode('utf-8'))  # 在/niubi/目录下面创建名字为shang的文件,里面写入数据“世界你好,我来了”。注意,数据要使用utf-8编码才行,该方法还有一个要注意的参数,append参数用于说明要不要覆盖已有的内容,默认为False,即在尾部添加。overwrite参数指明如果文件已经存在时的操作,True表示覆盖,False时如果文件已存在就抛异常
print(lian.list('/niubi'))

#查看文件的内容
with lian.read('/niubi/shang2') as f:  # read方法返回的是上下文管理器对象,所以要使用with调用
    print(f.data.decode('utf-8'))  # 返回的数据放在read函数的data变量里面,并且存储的是utf-8的编码,所以要转码才能看到中文,当然,英文不用转码


from pyhive import hive
import thrift
import sasl
import thrift_sasl
print("hello dog")
conn = hive.Connection(host='master', port=10000, database='hive_dlp_data_test',auth='NONE')
cursor=conn.cursor()
cursor.execute('show tables')
print(cursor.fetchall())
print("hello AAA")
# for result in cursor.fetchall():
#     print("hello BBB")
#     print(result)
#     print("hello CCC")
# cursor.execute('select * from hive_dlp_data_test.sink_dlp_hive limit 6')
# print(cursor.fetchall())
cursor.execute("select * from hive_dlp_data_test.sink_dlp_hive where ts_date='20211021' and ts_hour='19' limit 6")
print(cursor.fetchall())
print("BBBB")
cursor.execute("SELECT * FROM sink_dlp_hive WHERE ts_date = '20211021' and ts_hour = '19' LIMIT 2")

print(cursor.fetchall())

print("CCC")
cursor.execute("SELECT * FROM sink_dlp_hive WHERE ts_date = '20211021' and ts_hour = '19' LIMIT 2")

print(cursor.fetchall())
print("DDD test count :")
cursor.execute("SELECT * FROM sink_dlp_hive WHERE ts_date = '20211021' and ts_hour = '19' LIMIT 2")

#!/usr/bin/python
# -*- coding: UTF-8 -*-



import pandas as pd
from pyhive import hive
import thrift
import sasl
import thrift_sasl
database_name= "hive_dlp_data_test"
table_name = "sink_dlp_hive"
conn = hive.Connection(host='master', port=10000, database=database_name, auth='NONE')
cursor = conn.cursor()
cursor.execute('show tables')
print("tables:",cursor.fetchall())
cursor.execute('show databases')
print("databases:",cursor.fetchall())

def fetch_data(database_name, table_name, ts_date, ts_hour, limit=None):
    print("database_name:",database_name)
    try:
        print("BBB")
        describe_query = 'describe extended ' + table_name
        cursor.execute(describe_query)
        print("CCCC")
        colnames = []
        for result in cursor.fetchall():
            colnames.append(result[0])
        print("DDDD",colnames)
        if limit:
            lmt_cmd = ' LIMIT ' + str(limit)
        else:
            lmt_cmd = ''
        select_query = "SELECT * FROM " + table_name + " WHERE ts_date = " + "'"+ts_date + "'"+" and ts_hour = " + "'"+ts_hour+"'" + lmt_cmd

        print("EEEE",select_query)
        print("FFFF")

        cursor.execute(select_query)
        print("#####data###get data")


        print(cursor.fetchall())
        print("GGGG")
        # df = pd.read_sql_query(select_query, conn)
        # df.columns = colnames[:df.shape[1]]


        return None
        #return df

    except Exception as e:
        # if conn:
        #     conn.close()
        print("except ERROR :",e)
        return None


for day in range(20, 22):
    for hour in range(18, 20):

        if hour < 10:
            hour_str = '0' + str(hour)
        else:
            hour_str = str(hour)
        print("AAA:",day, hour)
        fetch_data(database_name, table_name, '202110' + str(day), hour_str,limit=2)
        #df =print(day, hour, df.shape)
if conn:
    conn.close()

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import pandas as pd
from pyhive import hive
import thrift
import sasl
import thrift_sasl

database_name= "hive_dlp_data_test"
table_name = "sink_dlp_hive"
conn = hive.Connection(host='master', port=10000, database=database_name, auth='NONE')
cursor = conn.cursor()
cursor.execute('show tables')
print("tables:",cursor.fetchall())
cursor.execute('show databases')
print("databases:",cursor.fetchall())
cursor.execute('SELECT COUNT(*) FROM hive_dlp_data_test.sink_dlp_hive WHERE TS_DATE = 20211021 AND TS_HOUR = 18')
print("count test:",cursor.fetchall())

def fetch_data(database_name, table_name, ts_date, ts_hour, limit=None):
    print("databases:",database_name)
    try:
        describe_query = 'describe extended ' + table_name
        cursor.execute(describe_query)
        colnames = []
        for result in cursor.fetchall():
            colnames.append(result[0])

        if limit:
            lmt_cmd = ' LIMIT ' + str(limit)
        else:
            lmt_cmd = ''
        select_query = 'SELECT * FROM ' + table_name + ' WHERE ts_date = ' + ts_date + ' and ts_hour = ' + ts_hour + lmt_cmd
        print("select_query:",select_query)
        df = pd.read_sql_query(select_query, conn)
        print(type(df))
        df.columns = colnames[:df.shape[1]]
        return df

    except Exception as e:
        print("ts_date, ts_hour:",ts_date, ts_hour)
        print("except ERROR:",e)
        # if conn:
        #     conn.close()


for day in range(20, 22):
    for hour in range(0, 24):

        if hour < 10:
            hour_str = '0' + str(hour)
        else:
            hour_str = str(hour)
        print("date,hour:",day,hour)
        df = fetch_data(database_name, table_name, '202110' + str(day), hour_str)
        print("result:",day, hour, df.shape)

if conn:
    conn.close()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小金子的夏天

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值