Python 连接 Hive ,查询数据后,导出到csv文件
导包
from impala.dbapi import connect
import os
import re
import csv
import time,datetime
import pandas as pd
开始时间
start = datetime.datetime.now()
print(“开始时间:” + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
连接Hive
conn = connect(host=‘196.128.50.202’,port = 1000,auth_mechanism=‘PLAIN’)
cur=conn.cursor()
执行查询
cur.execute("SELECT * FROM table ")
i = 0
result = []
从结果中查出一条,然后循环下移cursor
row = cur.fetchone()
while row is not None:
i = i + 1
result.append(row)
row = cur.fetchone()
if i %10000==0:
df = pd.DataFrame(result)
df.to_csv(“D://gongshan