python下excel转csv读取每一个sheet,合并csv,csv批量转入MySQL,更新csv数据插入到MySQL表,MySQL表导出为csv
Excel文件读取每一个sheet保存为csv
import pandas as pd
import xlrd
import os
需要第三方库
import pandas as pd
import xlrd
import os
‘’’
这里用的是pandas一次全部读入
‘’’
def excel2csv(excel_file):
# 打开excel文件
workbook=xlrd.open_workbook(excel_file)
# 获取所有sheet名字
sheet_names=workbook.sheet_names()
namefirst=excel_file.split('\\')[-1]
name=namefirst.split('.')[0]
print(namefirst)
for worksheet_name in sheet_names:
# 遍历每个sheet并用Pandas读取
data_xls=pd.read_excel(excel_file,worksheet_name,encoding='utf-8',index_col=0)
# 获取excel当前目录
dir_path=os.path.abspath(os.path.dirname(excel_file))
# 转换成csv并保存到excel所在目录下的csv文件夹中
csv_path=dir_path+'\\csv\\'
if not os.path.exists(csv_path):
os.mkdir(csv_path)
data_xls.to_csv(csv_path+name+'-'+worksheet_name+'.csv',encoding='utf-8')
#.为当前目录下
excel2csv('./xxxxx.xlsx')
合并csv文件
需要第三方库
import pandas as pd
import os
按文件路径合并
'''
file_path:csv文件所在路径
filename:合并后文件名
'''
def path_connectcsv(file_path,filename):
csvfile=[]
for file in os.listdir(file_path):
if file.endswith('.csv'):
f=pd.read_csv(file)
csvfile.append(f)
'''
axis=0:行合并
axis=1:列合并
'''
data_csv=pd.concat(csvfile,axis=1)
data_csv.to_csv(file_path+'/'+filename+'.csv',index=0,sep=',')
##执行
path_connectcsv(file_path,filenmae)
按文件名合并
'''
filename:需要和并的csv文件名列表
path:需要合并的csv文件位置
'''
def name_connectcsv(filename,path):
csvfile=[]
for file in filename:
filepath=path+file
f=pd.read_csv(filepath)
csvfile.append(f)
data_csv=pd.concat(csvfile,axis=1)
data_csv.to_csv(path+filename+'.csv',index=0,sep=',')
##
name_connectcsv(filename,path)
csv批量转入MySQL
这是在mysql里新建一张表,字段为csv的columns,字段类型自动转为对应的mysql类型,有需要可以更改
需要第三方库
import pymysql
import pandas as pd
class MySQL(object):
'''
host:指定登录ip,自己电脑的一般为localhost
user:mysql用户ing
password:mysql密码
db:数据库名
port:端口号
file:需要导入mysql的csv文件
'''
def __init__(self,host,user,password,db,port,file):
schema = {
'user': user,
'host': host,
'password': password,
'db': db,
'port': port,
}
try:
self.epmall = pymysql.connect(**schema)
print('[Mysql]>>连接成功')
except Exception as e:
# epmall.rollback()
print('[Mysql]>>连接失败')
quit()
else:
self.cur=self.epmall.cursor()
df_old=pd.read_csv(file,encoding='utf-8')
# print(df_old.head())
df_new=pd.read_csv(file,encoding='utf-8')
# print(df_new.head())
##空值填充None
self.df=df_old.astype(object).where(pd.notnull(df_old),None)
columns=self.df.columns.tolist()
'''
ftypes:df中columns对应type
'''
types=self.df.ftypes
make_table=[]
for item in columns:
if "int" in types[item]:
char=item+' '+'INT'
elif 'float' in types[item]:
char=item+' '+'FLOAT'
elif 'object' in types[item]:
char=item+' '+'VARCHAR(255)'
elif 'datetime' in types[item]:
char=item+' '+'DATETIME'
make_table.append(char)
self.result=','.join(make_table)
print(','.join(make_table))
'''
db_name:数据库名字
table_name:在数据库中创建的表名
'''
def csv2mysql(self,db_name, table_name):
# 选择连接database
self.epmall.select_db(db_name)
self.cur.execute('CREATE TABLE IF NOT EXISTS {}({})'.format(table_name,self.result))
values = self.df.values.tolist()
# 根据columns个数
s = ','.join(['%s' for _ in range(len(self.df.columns))])
# executemany批量操作 插入数据 批量操作比逐个操作速度快很多
try:
self.cur.executemany('INSERT INTO {} VALUES ({})'.format(table_name, s), values)
self.epmall.commit()
print('成功')
except Exception as e:
self.epmall.rollback()
print('错误信息:',e)
def __del__(self):
self.cur.close()
self.epmall.close()
a=MySQL(host,user,password,db, port, './xxxxx.csv')
a.csv2mysql(db_name='db',table_name='xxxxx)
更新csv数据插入到MySQL表
表已经在mysql中存在,新数据在csv中,需要插入到已有表更新数据库
需要的第三方库
import pymysql
import pandas as pd
class MySQL(object):
def __init__(self,host,user,password,db,port,file):
schema = {
'user': user,
'host': host,
'password': password,
'db': db,
'port': port,
}
try:
self.epmall = pymysql.connect(**schema)
print('[Mysql]>>连接成功')
except Exception as e:
# epmall.rollback()
print('[Mysql]>>连接失败')
quit()
else:
self.cur=self.epmall.cursor()
df_old=pd.read_csv(file,encoding='utf-8',index_col=0)
print(df_old.head())
# df_new=pd.read_csv(file,encoding='utf-8')
# print(df_new.head())
self.df=df_old.astype(object).where(pd.notnull(df_old),None)
columns=self.df.columns.tolist()
types=self.df.ftypes
make_table=[]
for item in columns:
if "int" in types[item]:
char=item+' '+'INT'
elif 'float' in types[item]:
char=item+' '+'FLOAT'
elif 'object' in types[item]:
char=item+' '+'VARCHAR(255)'
elif 'datetime' in types[item]:
char=item+' '+'DATETIME'
make_table.append(char)
self.result=','.join(make_table)
print(','.join(make_table))
def csv2mysql(self,db_name, table_name):
# 选择连接database
self.epmall.select_db(db_name)
'''
values.tolist:以list的方式返回df的values也就是表内数据
'''
values = self.df.values.tolist()
print(values.head())
# 根据columns个数
s = ','.join(['%s' for _ in range(len(self.df.columns))])
# executemany批量操作 插入数据 批量操作比逐个操作速度快很多
try:
self.cur.executemany('INSERT INTO {} VALUES ({})'.format(table_name, s), values)
self.epmall.commit()
print('成功')
except Exception as e:
self.epmall.rollback()
print('错误信息:',e)
def __del__(self):
self.cur.close()
self.epmall.close()
a=MySQL(host,user,password,db, port, './xxxxx.csv')
a.csv2mysql(db_name='db',table_name='xxxxx')
MySQL表导出为csv
csv的columns为mysql表的字段名
需要的第三方库
import pymysql
from pandas import DataFrame,Series
import pandas as pd
class MySQL(object):
def __init__(self, host, user, password, db, port):
schema = {
'user': user,
'host': host,
'password': password,
'db': db,
'port': port,
}
try:
self.epmall = pymysql.connect(**schema)
print('[Mysql]>>连接成功')
except Exception as e:
# epmall.rollback()
print('[Mysql]>>连接失败')
quit()
else:
self.cur = self.epmall.cursor()
def __del__(self):
self.cur.close()
self.epmall.close()
def createnewmysql(self,sql):
# 将数据转化成DataFrame数据格式
df = pd.read_sql_query(sql, self.epmall)
test_data = DataFrame.from_records(df)
'''
把id设置成行索引
data_1 = data.set_index("id", drop=True)
根据需求
'''
写写入数据数据
pd.DataFrame.to_csv(test_data, "./data/output/csv/data2.csv", encoding="utf_8_sig")
end = time.time()
print('Running time: {} Seconds'.format(end - start))
print("写入成功")
a=MySQL(host,user,password,db, port)
'''
sql查询语句不要中文
'''
a.createnewmysql(sql="查询语句")