1. 连接mysql 数据库
import pymysql
import pandas as pd
class Mysql_:
def __init__(self):
self.db = pymysql.Connect(user='user', password='xxxx', host='xx.xx.xx.xx',
database='database', port=3306)
self.cursor = self.db.cursor()
def select(self, sql, data=None):
states = self.cursor.execute(sql)
res = self.cursor.fetchall()
return states, res
def insert_one(self, sql, data):
try:
self.cursor.execute(sql, data)
self.db.commit()
except:
self.db.rollback()
finally:
self.cursor.close()
def insert_many(self, sql, data):
try:
self.cursor.executemany(sql, data)
self.db.commit()
except:
self.db.rollback()
finally:
self.cursor.close()
def __del__(self):
self.cursor.close()
self.db.close()
if __name__ == '__main__':
data = pd.read_csv('xxx.csv')
data.fillna(0, inplace=True)
mysql_ = Mysql_()
sql = "insert into tablename values(%(filed_name1)s,%(filed_name2)s)"
items = []
for item in data.values[:1]:
values = []
for i in item[1:]:
if type(i) != str:
values.append(int(i))
else:
values.append(i)
data = dict(zip(['filed_name1','filed_name2'], values))
items.append(data)
mysql_.insert_many(sql, items)
2. 分别使用impyla和ibis两种模块,连接hive/impala数仓
from impala.dbapi import connect
from krbcontext import krbcontext
from impala.util import as_pandas
import ibis
class Config:
config = {
"kerberos_principal":"kerberos_user",
"keytab_file":"/home/user/user.keytab",
"host":"xxxxx",
"port":10000,
"AUTH_MECHANISM":"GSSAPI",
"webhdfs_host1":"xxxxxx",
"webhdfs_port":9870
}
class Hive_connect:
def __init__(self,config,database,service_name='hive'):
with krbcontext(using_keytab=True,principal=config.config['kerberos_principal'],keytab_file=config.config['keytab_file']):
self.conn = connect(host=config.config['host'],port=config.config['port'],database=database,auth_mechanism=config.config['AUTH_MECHANISM']
,kerberos_service_name=service_name)
self.cur = self.conn.cursor()
def select(self,sql):
self.cur.execute(sql)
df = as_pandas(self.cur)
return df
def insert_one(self,sql):
state = 'INSERT_FAIL'
try:
self.cur.execute(sql)
self.conn.commit()
state = self.cur.status()
except:
self.conn.rollback()
finally:
self.conn_close()
return state
def insert_many(self,sql,data):
state = 'INSERT_FAIL'
try:
self.cur.executemany(operation=sql,seq_of_parameters=data)
self.conn.commit()
state = self.cur.status()
except:
self.conn.rollback()
finally:
self.conn_close()
return state
def delete(self):
raise AttributeError ('不允许执行删除操作')
def conn_close(self):
self.cur.close()
self.conn.close()
class Hive_connect_ibis:
def __init__(self,config,database,service_name='hive'):
with krbcontext(using_keytab=True,principal=config.config['kerberos_principal'],keytab_file=config.config['keytab_file']):
self.hdfs = ibis.impala.hdfs_connect(host=config.config['webhdfs_host1'],port=config.config['webhdfs_port'],auth_mechanism=config.config['AUTH_MECHANISM']
,use_https=False,verify=False)
self.client = ibis.impala.connect(host=config.config['host'],port=config.config['port'],database=database,hdfs_client=self.hdfs
,auth_mechanism=config.config['AUTH_MECHANISM'],kerberos_service_name=service_name)
def select(self,sql):
res = self.client.sql(sql)
df = res.execute(limit='default'/None)
self.conn_close()
return df
def insert(self):
pass
raise AttributeError ('不允许执行删除操作')
def conn_close(self):
self.client.close()
config = Config()
if __name__ == '__main__':
config = Config()
hive_ = Hive_connect_ibis(config=config,database='database')
hive_.execute('select * from d_cust limit 10')