1、基础方法:
# 模块名字
# py_sql_excel.py
import pandas as pd
import pymysql
import openpyxl
class sql_excel:
# 通过类的__init__函数,初始化数据
def __init__(self, data_sql, excel_path, col_start, col_end):
self.data_sql = data_sql
self.excel_path = excel_path
self.col_start = col_start
self.col_end = col_end
# sql_data 查询数据库数据
def sql_for(self):
db = ""
dc_dict = {}
try:
# 打开数据库连接
db = pymysql.connect(
host="192.168.10.251",
port=3306,
user="root",
password="*******",
database="dong_hyrd_bi",
charset="utf8",
)
sql = self.data_sql
df_sql = pd.read_sql(sql, db)
# 取出全部行,迭代输出,保存在字典中。
for index, row in df_sql.iterrows():
dc_dict[list(row.values)[0]] = list(row.values)[1:]
# 返回数据库字典
return dc_dict
except:
# Rollback in case there is any error
db.rollback()
finally:
db.close()
# 取出excel的值,迭代输出,保存在字典中(按行迭代输出)
def pc_for(self):
# 路径参数
str_path = self.excel_path
wb_pc = openpyxl.load_workbook(str_path)
ws_pc = wb_pc.active
dictA_pc = {}
dict_tile_pc = ""
# 指定区域中的位置
col_start = self.col_start
col_end = self.col_end
for row in ws_pc[col_start: col_end]:
# 遍历每一行中的每一单元格
x = 0
listS_pc = []
for c in row:
x += 1
if x == 1:
dict_tile_pc = c.value
else:
listS_pc.append(c.value)
dictA_pc[dict_tile_pc] = listS_pc
return dictA_pc
# 数据库sql与excel对比
def dict_compare(self):
global key
# 保存缺少行value(错误信息保存)
listS = []
# 保存缺少行名(错误信息保存)
listL = []
str1 = self.pc_for()
str2 = self.sql_for()
# 1、先输出一个其中一个字典的key和values
# 2、直接拿着这个values和另一个字典中的的values对比。
# 3、str2[key][list_i]可以先通过key,拿到一个字典的value
# 4、两个value的值确定了,就可以比较了。把错误信息保存在list列表中。
try:
for key, value in str1.items():
for list_i, list_value in enumerate(value):
if list_value != str2[key][list_i]:
li_error = key, ":", list_value
listS.append(li_error)
except:
listL.append(key)
# 判断excel是否定位正确
try:
if key is None:
return "excel 定位错误"
else:
return "行value错误:", listS, "缺少行名:", listL
except:
return "excel 定位错误"
2、普通执行方法:
# 模块名字
# py_sql_excel_result.py
import srcd.t_data.py_sql_excel as pyResult
sql = "select *, sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH) as 'TEU', " \
"concat((Round(((sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH)/" \
"sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH+ta.20n+ta.40n+ta.tsn))*100),2)),'%') as 'zhanbi' " \
"from( " \
"select t.company_name as 'company_n1',SUM(t.actual_num) as 'actual_n1', " \
"sum(CASE t.boxType WHEN '20GP' THEN 1 ELSE 0 END) as '20GP'," \
"sum(CASE t.boxType WHEN '20RF' THEN 1 ELSE 0 END) as '20RF', " \
"sum(CASE t.boxType WHEN '40RF' THEN 1 ELSE 0 END) as '40RF', " \
"sum(CASE t.boxType WHEN '40HC' THEN 1 ELSE 0 END) as '40HC', " \
"sum(CASE t.boxType WHEN '40RH' THEN 1 ELSE 0 END) as '40RH', " \
"sum(CASE t.boxType WHEN '20' THEN 1 ELSE 0 END) as '20n', " \
"sum(CASE t.boxType WHEN '40' THEN 1 ELSE 0 END) as '40n', " \
"sum(CASE t.boxType WHEN '特殊' THEN 1 ELSE 0 END) as 'tsn' " \
"from dm_container_volume t " \
"GROUP BY t.company_name) ta " \
"group by ta.company_n1"
str_path = "./dataFile/pc_for.xlsx"
col_start = "B3"
col_end = "M4"
result_se = pyResult.sql_excel(sql, str_path, col_start, col_end).dict_compare()
print(result_se)
3、参数执行方法:
# 模块名字
# py_sql_excel_result_other.py
import srcd.t_data.py_sql_excel as pyResult
import time
# 用for循环通过去字典的k,v值,来对比多张不同维度的报表
zd_str_path = {"company_name": "./dataFile/pc_for.xlsx"}
for zd, str_path in zd_str_path.items():
sql = "select *, sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH) as 'TEU', " \
"concat((Round(((sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH)/" \
"sum(ta.20GP+ta.20RF+ta.40RF+ta.40HC+ta.40RH+ta.20n+ta.40n+ta.tsn))*100),2)),'%') as 'zhanbi' " \
"from( " \
f"select t.{zd} as 'company_n1',SUM(t.actual_num) as 'actual_n1', " \
"sum(CASE t.boxType WHEN '20GP' THEN 1 ELSE 0 END) as '20GP'," \
"sum(CASE t.boxType WHEN '20RF' THEN 1 ELSE 0 END) as '20RF', " \
"sum(CASE t.boxType WHEN '40RF' THEN 1 ELSE 0 END) as '40RF', " \
"sum(CASE t.boxType WHEN '40HC' THEN 1 ELSE 0 END) as '40HC', " \
"sum(CASE t.boxType WHEN '40RH' THEN 1 ELSE 0 END) as '40RH', " \
"sum(CASE t.boxType WHEN '20' THEN 1 ELSE 0 END) as '20n', " \
"sum(CASE t.boxType WHEN '40' THEN 1 ELSE 0 END) as '40n', " \
"sum(CASE t.boxType WHEN '特殊' THEN 1 ELSE 0 END) as 'tsn' " \
"from dm_container_volume t " \
f"GROUP BY t.{zd}) ta " \
"group by ta.company_n1"
col_start = "B3"
col_end = "M4"
result_se = pyResult.sql_excel(sql, str_path, col_start, col_end).dict_compare()
time.sleep(2)
print(result_se)