如何用Python从Oracle数据库高效查询数据
要点:
- 数据类型转换OutputTypeHandler,避免Oracle LOB/CLOB等数据类型查询报错
- cursor.arraysize 和cursor.prefetchrows 提高查询速度,一般设置大于查询结果行数。该设置可以降低/减少查询时间(十倍,甚至几十倍的查询速度提高)
import cx_Oracle #cx_Oracle module is imported to provide the API for accessing the Oracle database
import db_config # import db_config.py for the db access username/password/db info
import time
import pandas as pd
import decimal
import copy
start = time.time() # record the time when the code start to run
def OutConverter(value):
''' 1. replace null with blank '''
if value is None:
return ''
return value
def OutputTypeHandler(cursor, name, defaultType, size, precision, scale):
''' 2. Output type handler function '''
if defaultType in (cx_Oracle.DB_TYPE_VARCHAR, cx_Oracle.DB_TYPE_CHAR):
return cursor.var(str, size, arraysize = cursor.arraysize, outconverter=OutConverter)
if defaultType == cx_Oracle.CLOB:
return cursor.var(cx_Oracle.LONG_STRING, arraysize = cursor.arraysize,outconverter=OutConverter)
if defaultType == cx_Oracle.BLOB:
return cursor.var(cx_Oracle.LONG_BINARY, arraysize = cursor.arraysize,outconverter=OutConverter)
if defaultType == cx_Oracle.NUMBER:
return cursor.var(decimal.Decimal, arraysize=cursor.arraysize, outconverter=OutConverter)
if defaultType == cx_Oracle.TIMESTAMP:
return cursor.var(str, arraysize = cursor.arraysize,outconverter=OutConverter)
def oracle_query(sql_statement):
''' 3. Query Oracle Database'''
connection = cx_Oracle.connect(db_config.user, db_config.pw, db_config.dsn)
connection.outputtypehandler = OutputTypeHandler
cursor = connection.cursor()
cursor.arraysize = 1000
cursor.prefetchrows = 1000
cursor.execute(sql_statement)
# 3.1. Query Column info - colunm name/type/...
for column in cursor.description: print(column)
# 3.2. Changing Query Results with Rowfactories
columns = [col[0] for col in cursor.description]
cursor.rowfactory = lambda *args: dict(zip(columns, args))
data = cursor.fetchall()
# 3.4. Covert a Diction List Data to a two-dimensional data structure table
#result = pd.DataFrame(data)
result = data
#print(result.tail())
return result
def save_to_excel(data1,excel_name,sheet_name):
'''4. Save data to a Excel'''
print("Start write data to Excel........")
data = pd.DataFrame(data1)
data.to_excel(excel_name, sheet_name, index=0)
print("Finish write to a Excel: " + excel_name+ "\n")
sql_statement = '''SELECT * from table_name'''
data = oracle_query(sql_statement)
elapsed = (time.time() - start)
print(">>> The code running: ",elapsed, "Seconds")