前言
我们经常会遇到我们对数据库中数据进行拉取并处理脚本,我们如果要操作线上数据库。用java操作就过于麻烦,这个时候我们更方便的方法是直接通过一个python脚本去读取数据库,并进行处理数据。那么我们可以将读取数据库数据,写入txt文件,excel文件,读取txt文件,excel文件作为一个标板。用于后续直接修改文件内容
实践
我们在项目中创建如下文件夹文件
conf-dev.ini 配置文件
[db-config]
host = localhost
port = 3306
username = root
password = 123456
database = mybatis
[file-config]
file_name=net_wall_dev.xlsx
我们通过创建ReadConfig.py环境配置读取配置文件内容
# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# Name: ReadConfig
# Description:
# Author: Administrator
# Date: 2022/1/15
#-------------------------------------------------------------------------------
import configparser
import os
class ReadConfig:
def __init__(self,env=None):
if env:
env = env
else:
env= "dev"
print("env="+env)
#root_dir=os.path.abspath(".") #当前目录的绝对路径
#root_dir=os.path.abspath(r"..") #上级目录的绝对路径
root_dir=os.path.dirname(os.path.abspath(__file__))
config_name="config-{}.ini".format(env)
config_path= os.path.join(root_dir,config_name)
print("config_path="+config_path)
self.cf = configparser.ConfigParser()
self.cf.read(config_path)
def get_db_config(self,param):
value=self.cf.get("db-config",param)
return value
def get_file_config(self,param):
value=self.cf.get("file-config",param)
return value
if __name__ == '__main__':
test= ReadConfig("dev")
host=test.get_db_config("database")
print("database:"+host)
file_name= test.get_file_config("file_name")
print("file_name:"+file_name)
创建db.py 读取数据库数据到excel 文件,txt 文件当中
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: ReadConfig
# Description:
# Author: Administrator
# Date: 2022/1/9
# -------------------------------------------------------------------------------
import importlib
import logging
import os
import sys
import pymysql
import xlrd
import xlsxwriter
from sre_python.common.ReadConfig import ReadConfig
importlib.reload(sys)
# env = sys.argv[1]
env = "dev"
# 读txt文件
def read_file(file_path):
f = open(file_path, 'r',encoding='utf-8')
result = list()
for line in f.readlines(): # 逐行读取数据
line = str(line.strip()) # 去掉每行头尾空白
if not len(line) or line.startswith('#'): # 判断是否是空行或注释行
continue # 是的话,跳过不处理
result.append(line) # 保存
result.sort() # 排序结果
return result
# 写txt文件
def write_file(file_path, result):
f = open(file_path, 'w',encoding='utf-8')
f.write('%s' % '\n'.join(result))
# 读excel(2.0.1 xlrd 版本后只支持xls类型excel)
def read_excel(excel_path, sheet_index, row_index):
data = xlrd.open_workbook(excel_path)
table = data.sheets()[sheet_index]
nrows = table.nrows # table 中的有效行数
source_list = []
for i in range(row_index, nrows):
source_data = table.row_values(i, start_colx=0, end_colx=None)
source_list.append(source_data)
pass
return source_list
def write_excel(column_header, result_list, file_path):
if os.path.exists(file_path):
os.remove(file_path)
pass
workbook = xlsxwriter.Workbook(file_path)
worksheet = workbook.add_worksheet()
# 写入列头
row = 0
col = 0
for header in column_header:
worksheet.write(row, col, str(header))
col += 1
pass
# 写入数据行
col = 0
row += 1
for result in result_list:
for item in result:
worksheet.write(row, col, str(item))
col += 1
pass
col = 0
row += 1
pass
workbook.close()
return "./" + file_path
class DbExcelClient:
def __init__(self, env):
self.config = ReadConfig(env)
self.root_path = os.path.dirname(os.path.dirname(__file__))
def conn_db(self):
host = self.config.get_db_config("host")
port = self.config.get_db_config("port")
username = self.config.get_db_config("username")
password = self.config.get_db_config("password")
database = self.config.get_db_config("database")
# mysql
print(database)
self.conn = pymysql.connect(host=host, user=username, passwd=password, port=int(port), db=database)
# postgres
# self.conn = psycopg2.connect(host=host,user = username,passwd = password,db = database,port =port)
self.cur = self.conn.cursor()
def execute_sql(self, sql_content):
self.cur.execute(sql_content)
self.conn.commit()
return self.cur.fetchall()
def dis_conn(self):
self.cur.close()
self.conn.close()
def execute_no_result_sql(self, sql):
self.cur.execute(sql)
self.conn.commit()
def get_total_count(self, sql):
self.cur.execute(sql)
rows = self.cur.fetchall()
result = None
for row in rows:
result = row[0]
return result
if __name__ == '__main__':
root_dir = os.path.dirname(os.path.abspath(__file__))
print("root_dir:" + root_dir)
log_file = os.path.join(root_dir, "log_file.txt")
logger = logging.getLogger('log')
logger.setLevel(logging.DEBUG)
fmt = "%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s"
formatter = logging.Formatter(fmt)
fh = logging.FileHandler(log_file,encoding='utf-8')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
dbClient = DbExcelClient(env)
dbClient.conn_db()
excel_list = []
file_list = []
rows = dbClient.execute_sql("select username,birthday from user")
total = dbClient.get_total_count("select count(*) from user")
logger.info("msage total is" + str(total))
for row in rows:
excel_list.append((row[0], row[1]))
file_list.append(str(row[0]) + ":" + str(row[1]))
pass
write_file("db_text.txt", file_list)
column_header = ["姓名", "生日"]
write_excel(column_header, excel_list, "db_excel.xlsx")
new_file_list = read_file("db_text.txt")
for row in new_file_list:
logger.info("file_row:" + row)
# 读excel(2.0.1 xlrd 版本后只支持xls类型excel)
new_excel_list = read_excel("db_excel_xls.xls", 0, 0)
for row in new_excel_list:
logger.info("excel_row:" + row[0] + "excel_row:" + row[1])
dbClient.dis_conn()