import xlrd as xr
import os,pickle
import cx_Oracle
import sys,re,datetime,csv
from xlrd import xldate_as_datetime, xldate_as_tuple
import time
def trim(s):
r = re.findall('[\S]+', s)
print ("替换:"+s)
return " ".join(r)
def query_sql(v_sql):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor() # 获取cursor
try:
# 解析sql语句
c.parse(v_sql)
# 捕获SQL异常
except cx_Oracle.DatabaseError as e:
print(e)
c.execute(v_sql) # 使用cursor进行各种操作
row = c.fetchall() # 可以调用cursor.fetchall()一次取完所有结果,或者cursor.fetchone()一次取一行结果
c.close() # 关闭cursor
conn.close() # 关闭连接
return row
def insert_batch_sql(v_sql, data):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor()
try:
c.parse(v_sql)
except cx_Oracle.DatabaseError as e:
print(e)
c.executemany(v_sql,data) #批量写入
conn.commit()
c.close()
conn.close()
def insert_log(log_msg):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor()
dt =datetime.datetime.now()
param = {'rzxx': log_msg,'rzsj':dt}
c.execute('insert into t_log values(seq_t.nextval,:rzxx,:rzsj)', param);
conn.commit()
c.close()
conn.close()
#excel写入数据,检查数据库表的列数和excel是否一致,不一致的跳过
def excel_check_and_imp(filename,tablename,col_nums):
data = xr.open_workbook(filename)
sheets = data.sheet_names()
for sheet in sheets:
table = data.sheet_by_name(sheet)
ncols = table.ncols
if ncols != col_nums:
log_msg = "[跳过原因2]文件:["+filename+"],sheet["+sheet+"]和表["+tablename+"]列数不一致"
print("文件:[%s],sheet[%s]和表[%s]列数不一致,跳过"%(filename,sheet,tablename))
print(table.row_values(1))
insert_log(log_msg)
return -1
else:
batch_to_ora(filename,tablename)
return 1
#检查路径下是否有excel文件
def fold_files_check(path):
if os.path.exists(path):
filenames = os.listdir(path)
for i,filename in enumerate(filenames):
if i == 0:
iSpecialFile = i + 1
sFileName = filename
print('==================正在检查第%s个文件=========================' % (i + 1))
if sFileName.find(".xls") <0 :
log_msg="[跳过原因1]文件:[" + filename + "]为非excel文件"
print('文件:[%s]为非excel文件,跳过' % (filename))
insert_log(log_msg)
continue
else:
log_msg = "文件:[" + (path+"\\"+filename) + "]文件格式检查通过,开始导入"
print('文件:[%s]文件格式检查通过' % (path+"\\"+filename))
insert_log(log_msg)
#通过filename确定表名
list_table=['T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','T21','T22','T23']
tablename=(list_table[int(filename[0:2])-1])
# 获取表的列数
v_sys_sql = "select count(1) from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
cols_name = query_sql(v_sys_sql)
for col in cols_name:
col_nums = col[0]
#检查通过的进行导入
if excel_check_and_imp(path+"\\"+filename,tablename,col_nums) <0 :
continue
else:
print('文件夹:(%s)不存在!' % (path))
def batch_to_ora(filename,tablename):
#读excel
data = xr.open_workbook(filename)
sheets = data.sheet_names()
for sheet in sheets:
table = data.sheet_by_name(sheet)
#table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
#准备变量
list = [] #批量写入
num = 0
#插入表的SQL
sql = "INSERT INTO :tab :cols VALUES :vals "
#查询系统视图,获取列名,并组装SQL
v_sys_sql = "select listagg(column_name,',') WITHIN GROUP (order by column_id),listagg(':'||column_id,',') WITHIN GROUP (order by column_id) from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='"+tablename.upper()+"'"
cols_name = query_sql(v_sys_sql)
for col in cols_name:
cols = "(" + col[0] + ")"
vals = "(" + col[1] + ")"
#获取所有数据类型列表
v_sys_sql2 = "select DATA_TYPE from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
coltype = []
col_types = query_sql(v_sys_sql2)
for i in range(len(col_types)):
str_type=str(col_types[i])
if str_type.find("VARCHAR2")>0:
coltype.append("VARCHAR2")
elif str_type.find("NUMBER")>0:
coltype.append("NUMBER")
elif str_type.find("DATE")>0:
coltype.append("DATE")
#组装SQL
sql=sql.replace(":cols", cols).replace(":vals", vals).replace(":tab",tablename)
# for iRow in range(1, nrows):
# for iCol in range(ncols):
# sCell = table.cell_value(iRow, iCol)
# # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
# ctype = table.cell(iRow, iCol).ctype
# if ctype == 3:
# date = xr.xldate_as_datetime(sCell,data.datemode)
#批量写入,6000一批
excel_cols_num_imp = 0
for i in range(1,nrows):
tab_row = table.row_values(i)
for j, x in enumerate(tab_row):
if table.cell(i, j).ctype==3:
tab_row[j]=xr.xldate_as_datetime(tab_row[j],data.datemode).strftime('%Y-%d-%m %H:%M:%S')
if x=='':
tab_row[j] = None
# if table.cell(i, j).ctype==1 and trim(x)== '':
# tab_row[j] = None
# elif coltype[j] == 'NUMBER':
# if trim(x) == '':
# tab_row[j]=None
#如果数据类型为varchar,并且excel为科学技术法,则进行转换
elif coltype[j] == 'VARCHAR2':
try:
v=str(tab_row[j])
#科学计数法转换成字符串
if v.find("e+") != -1:
v = "%.f" % float(tab_row[j])
tab_row[j] = str(v)
except Exception as e:
tab_row[j] = str(tab_row[j])
#如果为number,且excel为字符,则强制转换为Int类型
#TypeError: expecting string or bytes object:一般出现这个问题,是因为excel读出来为'96.99',带引号的,要使用float转一下
elif coltype[j] == 'NUMBER':
try:
v = str(tab_row[j])
if v.find(".") != -1:
v = "%.2f" % float(tab_row[j])
tab_row[j] = float(v)
else:
tab_row[j] = int(v)
except Exception as e:
tab_row[j] = str(tab_row[j])
list.append(tab_row)
print(tab_row)
num += 1
if num>=batch_limit:
print("正在导入文件:[%s],sheet:[%s]总共[%s]行,已导入[%s]行" % (filename,sheet, nrows - 1, excel_cols_num_imp))
insert_batch_sql(sql, list)
excel_cols_num_imp = excel_cols_num_imp + num
num = 0
list.clear()
log_msg = "文件:[" + filename + "],sheet["+sheet+"]总共[" + str(nrows-1)+ "]行,已导入["+str(excel_cols_num_imp)+"]行"
print("文件:[%s],sheet:[%s]总共[%s]行,已导入[%s]行"%(filename,sheet,nrows-1,excel_cols_num_imp))
insert_log(log_msg)
#剩下的小于批量数的再次插入
print("正在导入文件:[%s],sheet:[%s]总共[%s]行,已导入[%s]行,导入完成" % (filename,sheet, nrows - 1, excel_cols_num_imp))
insert_batch_sql(sql, list)
excel_cols_num_imp = excel_cols_num_imp + num
list.clear()
log_msg = "文件:[" +filename+ "总共" + str(nrows-1) + "]行,已导入[" + str(excel_cols_num_imp) + "]行,导入完成"
print("文件:[%s],sheet:[%s]总共[%s]行,已导入[%s]行,导入完成" % (filename, sheet,nrows-1, excel_cols_num_imp))
insert_log(log_msg)
batch_limit = 10000
fold_files_check("C:\\Users\\sp\\Desktop\\data\\******")
#fold_files_check("C:\\test")
# if __name__=="__main__":
# cmd=sys.argv[1]
# p1=cmd.split(",")[0]
# fold_files_check(p1)