1.1python连接mysql数据库
import pandas as pd
import mysql.connector
config = {
'user': '用户名',
'password': '密码',
'host': '地址',
'database': '数据库名',
'raise_on_warnings': True,
}
cnx = mysql.connector.connect(**config)
query_1 =
"""
SELECT * FROM table
"""
df_query_1= pd.read_sql_query(query_1, cnx)
cnx.close()
df_query_1.head()
2.1python连接oracle数据库
import cx_Oracle
import pandas as pd
def execute_sql_oracle(sql):
username = '用户名'
password = '密码'
dsn = '地址:1521/数据库名称'
connection = cx_Oracle.connect(username, password, dsn)
cursor = connection.cursor()
try:
cursor.execute(sql)
column_names = [i[0] for i in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(data, columns=column_names)
return df
except cx_Oracle.DatabaseError as e:
error, = e.args
print(f"Oracle-Error-Code: {error.code}")
print(f"Oracle-Error-Message: {error.message}")
finally:
if cursor:
cursor.close()
if connection:
connection.close()
sql_query = """
SELECT * FROM table
"""
df = execute_sql_oracle(sql_query)
2.1在Oracle数据库一次只能匹配一千个,用python连接数据库,利用python函数写循环,解决一次匹配大于1000个的问题;(带进度条)
import cx_Oracle
import pandas as pd
from datetime import datetime
from IPython.display import display
from IPython.display import HTML
from ipywidgets import IntProgress, HBox, VBox, Layout
import time
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
def query_oracle_data(order_ids, sql_template):
placeholders = ', '.join([':var{}'.format(i) for i in range(1, len(order_ids) + 1)])
sql = sql_template.format(placeholders)
username = '用户名'
password = '密码'
dsn = '地址:1521/数据库名'
connection = cx_Oracle.connect(f"{username}/{password}@{dsn}")
cursor = connection.cursor()
cursor.execute(sql, order_ids)
data = cursor.fetchall()
header = [t[0] for t in cursor.description]
df = pd.DataFrame(data, columns=header)
cursor.close()
connection.close()
return df
def split_list(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
def execute_sql(excel_path, sql_template, chunk_size=1000):
df_excel = pd.read_excel(excel_path)
order_ids = df_excel['A'].tolist()
all_dfs = []
for chunk in split_list(order_ids, chunk_size):
df = query_oracle_data(chunk, sql_template)
all_dfs.append(df)
final_df = pd.concat(all_dfs, ignore_index=True)
return final_df
pass
def execute_sql_with_progress(excel_path, sql_template, chunk_size=1000):
start_time = time.time()
df_excel = pd.read_excel(excel_path)
order_ids = df_excel['A'].tolist()
num_chunks = len(order_ids) // chunk_size + (len(order_ids) % chunk_size > 0)
progress = IntProgress(min=0, max=num_chunks, description='Processing:')
display(progress)
all_dfs = []
for i, chunk in enumerate(split_list(order_ids, chunk_size), 1):
df = query_oracle_data(chunk, sql_template)
all_dfs.append(df)
progress.value = i
final_df = pd.concat(all_dfs, ignore_index=True)
progress.value = num_chunks
end_time = time.time()
run_time = end_time - start_time
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
success_message = HTML(f'<p style="color:blue;">运行成功,完成时间:{now},耗时:{run_time:.2f}秒</p>')
display(success_message, display_id='success_message')
return final_df
sql_1 ="""
select *
from table
where a in ({})
"""
excel_path='C:\\Users\\Admin\\Desktop\\要匹配的文件.xlsx'
final_df = execute_sql_with_progress(excel_path, sql_1)
final_df.head()