在项目中会碰到需要将excel批量导入数仓的需求,python的xlrd正好简单易用,拿来一试!
import psycopg2
import xlrd
import sys
import time
'''
连接数据库
args:db_name(数据库名称)
returns:db
'''
sys_time = time.ctime()
print(sys_time)
def gp_link():
try:
db = psycopg2.connect(dbname="",
user = "",
password = "",
host = "",
port = "5432")
return db
except:
print("could not connect to Greenplum server")
'''
读取excel函数
args:excel_file(excel文件,目录在py文件同目录)
returns:book
'''
def open_excel(excel_file):
try:
book = xlrd.open_workbook(excel_file) # 文件名,把文件与py文件放在同一目录下
print(sys.getsizeof(book))
return book
except:
print("open excel file failed!")
'''
执行插入操作
args:db_name(数据库名称)
table_name(表名称)
excel_file(excel文件名,把文件与py文件放在同一目录下)
'''
#db = gp_link(db_name) # 打开数据库连接
def store_to(table_name, excel_file):
db = gp_link()
cursor = db.cursor() # 使用 cursor() 方法创建一个游标对象 cursor
book = open_excel(excel_file) # 打开excel文件
sheets = book.sheet_names() # 获取所有sheet表名
for sheet in sheets:
sh = book.sheet_by_name(sheet) # 打开每一张表
row_num = sh.nrows
print(row_num)
list = [] # 定义列表用来存放数据
num = 0 # 用来控制每次插入的数量
for i in range(1, row_num): # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1
row_data = sh.row_values(i) # 按行获取excel的值
value = (row_data[0], row_data[1], row_data[2], row_data[3])
list.append(value) # 将数据暂存在列表
print(list)
num += 1
if (num >= 10000): # 每一万条数据执行一次插入
#print(sys.getsizeof(list))
sql_tru = "TRUNCATE TABLE " + table_name
sql = "INSERT INTO " + table_name + " (area, 团队, name, 职级) VALUES(%s,%s,%s,%s)"
print(sql_tru)
print(sql)
cursor.executemany(sql,list) # 执行sql语句
num = 0 # 计数归零
list.clear() # 清空list
print("worksheets: " + sheet + " has been inserted 1 datas!")
print("worksheets: " + sheet + " has been inserted " + str(row_num) + " datas!")
db.commit() # 提交
db.close() # 关闭连接
db.close()
if __name__ == '__main__':
store_to('table_name', 'D:\python_file\excel2gp\\test.xlsx')