# -*- encoding: utf-8 -*-
import pandas as pd
from sqlalchemy import create_engine
from cassandra.cluster import Cluster
from cassandra.query import dict_factory
from pykafka import KafkaClient
import uuid,json
import queue
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth', 100)
__all__ = ['sqlhelper']
__version__ = 1.2 # type: float
'''
目前支持mysql,Cassandra,kafka
在下面有实例
'''
class sqlhelper:
writenum = 0 # 写入的次数
def __init__(self, dms, user_name=None, password=None, ip=None, databases=None,
topic=None): # 初始化引擎,输入用户名,密码,使用的数据库
self.dms = dms
if self.dms == "mysql":
self.yinqingword = create_engine(
'mysql+pymysql://' + user_name + ':' + password + '@' + ip + ':3306/' + databases)
elif self.dms == 'cassandra':
self.cluster = Cluster([ip], connect_timeout=10)
self.session = self.cluster.connect()
self.session.row_factory = dict_factory
elif self.dms == 'kafka' and ip:
self.kafkaclient = KafkaClient(hosts=ip)
self.topic = self.kafkaclient.topics[topic.encode("utf8")]
self.producer = self.topic.get_sync_producer(delivery_reports=True)
def readmysql(self, sql): # 读取一个sql语句,返回一个pandas的表
self.df = pd.read_sql_query(sql, self.yinqingword)
return self.df
def writemysql(self, aim_doc, header, sheet_name):
# 从excel中,读取数据。
# aim_doc 为excel文件的地址,记得要转义;header 从第几行开始读取 从0开始;sheet_name 在数据库的表名
self.writenum += 1
self.wenjian = pd.read_excel(aim_doc, sheet_name='Sheet1', header=header)
self.wenjian.to_sql(sheet_name, self.yinqingword, index=False, if_exists='append')
print('have writed %d' % self.writenum)
def __iter__(self):
return self
def saveexcel(self, sql, path): # 使用sql语句,导出数据,格式为excel文件,文件名testsql
if isinstance(sql, type('')):
df = self.readmysql(sql=sql)
df.to_excel(index=False, excel_writer=path) # 这里按照需求改成你需要的路径。
return df
else:
sql.to_excel(index=False, excel_writer=path)
return sql
def savecsv(self, sql, path): # 使用sql语句,导出数据,格式为excel文件,文件名testsql
if isinstance(sql, type('')):
df = self.readmysql(sql=sql)
df.to_csv(index=False, path_or_buf=path) # 这里按照需求改成你需要的路径。
return df
else:
sql.to_csv(index=False, path_or_buf=path)
return sql
def readcasssandra(self, sql, databases=None):
if databases:
self.session.execute("use %s;" % databases)
res = self.session.execute(sql, timeout=10)
data = res.all()
need_fix = []
print(type(data[0]['tags']))
from cassandra.util import OrderedMapSerializedKey
for find_map in range(len(res.column_types)):
if str(res.column_types[find_map]) == "<class 'cassandra.io.asyncorereactor.MapType(VarcharType, VarcharType)'>":
need_fix.append(res.column_names[find_map])
if need_fix:
for i in data:
for value_fix in need_fix:
i[value_fix] =json.loads(i[value_fix].__str__().replace("'",'"'))
df = pd.DataFrame(data)
return df
def execassandra(self, sql, databases=None):
if databases:
self.session.execute("use %s;" % databases)
res = self.session.execute(sql, timeout=10)
return True
def kafkasent(self, msg):
self.producer.produce(msg.encode("utf8"), partition_key=str(uuid.uuid1()).encode('utf8'))
while True:
try:
old_msg, exc = self.producer.get_delivery_report(block=False)
if exc is not None:
self.producer.produce(old_msg.value, partition_key=old_msg.partition_key)
except queue.Empty:
print('完成')
break
def kafkaclose(self):
self.producer.stop()
def kafkaget(self):
consumer = self.topic.get_simple_consumer(
consumer_group='18',
auto_commit_enable=True,
auto_commit_interval_ms=1,
consumer_timeout_ms=10000
# consumer_id =1,
)
for message in consumer:
if message is not None:
print(message.offset, message.value)
print(message.offset, message.value)
print("do you go out")
consumer.stop()
if __name__ == '__main__':
'''
#使用MySQL
mysqlhandle=sqlhelper('mysql','root','123456','127.0.0.1','ts_use')
res=mysqlhandle.saveexcel("select * from job_day",'try.xlsx')
mysqlhandle.saveexcel(res,path='try.xlsx')
print(res)
'''
'''
#使用cassandra
mysqlhandle1=sqlhelper('cassandra',ip='192.168.0.99')
# res=mysqlhandle1.execassandra(sql="INSERT INTO test(attr_type , attr , metric_id , time , tags , value ) VALUES ( 'c','int','9','2020-04-15 14:12:10.00000',{'ip':'127.1.1.1'},9.9);",databases='ctest')
res=mysqlhandle1.readcasssandra('select * from test;',databases='ctest')
mysqlhandle1.saveexcel(res,'try.xlsx')
print(res)
'''
'''
#往Kafka中发消息
mysqlhandle2=sqlhelper('kafka',ip='192.168.1.99:9092',topic='tests')
mysqlhandle2.kafkasent(msg='111111')
mysqlhandle2.kafkaclose()
print('finish?')
'''
'''
#从Kafka中消费消息
mysqlhandle2 = sqlhelper('kafka', ip='192.168.1.99:9092', topic='tests')
mysqlhandle2.kafkaget()
mysqlhandle2.kafkaclose()
print("get all done?")
'''
mysql,Cassandra,kafka 上传与导出python脚本-1.2
最新推荐文章于 2023-11-27 18:31:15 发布
本文介绍了如何使用Python编写脚本来实现对MySQL、Cassandra和Kafka的数据导入与导出操作。内容涵盖连接数据库、读取数据、写入数据以及消息队列的操作,为数据迁移和处理提供了方便。
摘要由CSDN通过智能技术生成