问题
在项目中遇到一个问题,需要从文本中读取三万条数据写入mysql数据库,文件中为用@分割的sql语句,但是在读取的过程中发现速度过慢,三万八千条数据需要220秒,问题代码片段如下:
def read_to_mysql(filecata, targetDir):
'''
用来写入数据库,写入后会剪贴掉文件
filecata 为保存有文件地址的list,已去掉尾部的空格
:param filecata: 文件目录
:param targetDir: 要复制的目标目录
:return:
'''
root_dir = os.path.abspath(os.path.join(os.getcwd(), "./"))
config = configparser.ConfigParser()
config.read(root_dir + "/config.ini")
__host = config.get("DatabaseOfWRT", "host")
__database_name = config.get("DatabaseOfWRT", "database")
__user_name = config.get("DatabaseOfWRT", "username")
__user_passwaord = config.get("DatabaseOfWRT", "password")
__charset = config.get("DatabaseOfWRT", "charset")
conn = pymysql.connect(
host=__host,
user=__user_name, password=__user_passwaord,
database=__database_name,
charset=__charset
)
cursor = conn.cursor()
with open(filecata, "r", encoding='utf-8') as f:
data = f.read() # 读取文件
data_list = data.split('@')
del data_list[-1]
starttime = int(time.time())
for data_str in data_list:
data_str = str(data_str)
sql = data_str + ';'
cursor.execute(sql)
conn.commit()
print(flag)
copy_del_file(filecata, targetDir) # 用来剪切的函数,此处不影响,因而省略
cursor.close()
conn.close()
解决方案
经测试发现,影响速度的主要原因是commit(),因为没过几秒提交一次即可,但是因为提交的字符长度有限制,所以要设置一个合理的时间读取,代码修改如下:
def read_to_mysql(filecata, targetDir):
'''
用来写入数据库,写入后会剪贴掉文件
filecata 为保存有文件地址的list,已去掉尾部的空格
:param filecata:
:param targetDir: 要复制的目标目录
:return:
'''
root_dir = os.path.abspath(os.path.join(os.getcwd(), "./"))
config = configparser.ConfigParser()
config.read(root_dir + "/config.ini")
__host = config.get("DatabaseOfWRT", "host")
__database_name = config.get("DatabaseOfWRT", "database")
__user_name = config.get("DatabaseOfWRT", "username")
__user_passwaord = config.get("DatabaseOfWRT", "password")
__charset = config.get("DatabaseOfWRT", "charset")
conn = pymysql.connect(
host=__host,
user=__user_name, password=__user_passwaord,
database=__database_name,
charset=__charset
)
cursor = conn.cursor()
with open(filecata, "r", encoding='utf-8') as f:
data = f.read() # 读取文件
data_list = data.split('@')
del data_list[-1]
starttime = int(time.time())
for data_str in data_list:
endtime = int(time.time())
data_str = str(data_str)
sql = data_str + ';'
cursor.execute(sql)
if endtime - starttime ==10: # 每过十秒提交一次
starttime = int(time.time())
conn.commit()
conn.commit()
copy_del_file(filecata, targetDir)
cursor.close()
conn.close()
return flag
此时写入三万八千条数据需要9秒