在第一章从CSV中获取数据后,需要将提取的数据或经Pandas等处理后的数据进行存储。这里简单介绍下使用cx-Oracle操作数据库,和通过Oracle的数据载入工具sqlldr来匹配导入数据。
1.cx-Oracle
1.1.安装:
pip install cx_Oracle
1.2.安装oracle客户端,并添加到Path
oracle客户端下载地址
(安装过程,及Linux环境变量过程,略)
1.3.示例代码
操作Oracle工具 oracle_utils.py
class ConnectOracleDatabase(object):
def __init__(self):
self.user = DB_USER
self.password = DB_PASSWORD
self.ip = DB_IP
self.port = DB_PORT
self.service_name = DB_SERVICE
self.oci_path = ORACLE_OCI_DIR
# 这种设置环境变量,在Pycharm中运行无效
# 在菜单Run->Edit configurations 中,手动设置Environment variables,添加LD_LIBRARY_PATH的内容,即可解决问题。
# LD_LIBRARY_PATH=/home/oracle/instantclient_19_5
sys_version = platform.architecture()
if sys_version[1] is not "WindowsPE":
os.environ["LD_LIBRARY_PATH"] = self.oci_path
else:
os.environ["PATH"] = os.environ["PATH"] + ";" + self.oci_path
# 解决中文乱码问题
os.environ["NLS_LANG"] = "SIMPLIFIED CHINESE_CHINA.ZHS16GBK"
self.db_conn = None
self.db_cursor = None
self.create_connect()
def create_connect(self):
try:
self.db_conn = cx_Oracle.connect(self.user, self.password,
"%s:%s/%s" % (self.ip,
self.port,
self.service_name))
self.db_cursor = self.db_conn.cursor()
except Exception as error:
print("Error creating database connection: %s" % str(error))
# 执行DDL语句
def exec_sql_command(self, sql_command):
if sql_command:
print("ddl sql is", sql_command)
query_result = self.db_cursor.execute(sql_command)
return query_result
# 返回SQL的所有值
def query_sql(self, sql):
self.db_cursor.execute(sql)
return self.db_cursor.fetchall()
def __enter__(self):
self.create_connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
try:
self.db_cursor.close()
self.db_conn.close()
except AttributeError as error:
print(str(error))
使用示例:
with ConnectOracleDatabase() as oracle:
oracle.exec_sql_command("create table person(id number(10),name varchar2(255))")
2.Python执行sqlldr入库
cx-Oracle可以处理少量数据,一旦数据量过大则效率就会降低。所以较大的结果数据入库一般使用数据库自身提供的工具,这里以sqlldr举例:
2.1.示例代码
主要分两部分,一是根据数据模型,生成sqlldr的控制文件。二是由Python直接调用sqlldr。
生成sqlldr控制文件ctl
# 在环境变量ORACLE_CTL_PATH指定的目录中生成ctl文件
ctl_path = os.path.join(os.environ[ORACLE_CTL_PATH], table_name+".ctl")
if not os.path.exists(ctl_path):
with open(ctl_path, "w", encoding="utf-8") as file:
file.write("load data"+os.linesep)
file.write("replace into table " + table_name + os.linesep)
file.write("fields terminated by "" + terminated + """ + os.linesep)
file.write("trailing nullcols (" + os.linesep)
table_conf = zip(table_fields, table_fields_type)
colunms_tmps=[]
for fields, fields_type in table_conf:
colunms_tmp = fields
if "NUMBER" in fields_type.upper():
colunms_tmp = colunms_tmp + " DECIMAL EXTERNAL"
elif "DATE" in fields_type.upper():
colunms_tmp = colunms_tmp + " date "yyyy-mm-dd hh24:mi:ss""
colunms_tmps.append(colunms_tmp)
file.write(",".join(colunms_tmps)+" TERMINATED BY WHITESPACE )" + os.linesep)
调用sqlldr数据入库
# 判断文件编码 ,直接结局sqlldr导入中文乱码问题
with open(data, "rb") as f:
filedata = f.read()
print("file infos is", chardet.detect(filedata))
if "GB" in chardet.detect(filedata)["encoding"]:
os.environ["NLS_LANG"] = "SIMPLIFIED CHINESE_CHINA.ZHS16GBK"
else:
os.environ["NLS_LANG"] = "AMERICAN_AMERICA.AL32UTF8"
# 导入数据
sqllod_command = os.environ["ORACLE_HOME"]+"/sqlldr userid="+DB_USER+"/"+DB_PASSWORD+TNSNAME
sqllod_command += " control=" + ctl_path
sqllod_command += " data=" + data
sqllod_command += " log=" + os.path.join(os.environ[ORACLE_LOG_PATH], table_name+".log")
sqllod_command += " skip_index_maintenance=true readsize=15000000 streamsize=15000000 "
"columnarrayrows=5000 rows=1000 errors=1000000 direct=true skip=1"
print("sqllod_command=", sqllod_command)
print(os.environ["NLS_LANG"])
os.system(sqllod_command)
更多文章,请关注: