本文的亮点:一是批量,而是动态建表,三是动态插入,批量大小你可以自己设置。因为linux版本的不能截图完整,直接复制windows下开发的代码
直接上代码:
import pymysql
import os
import time
source_dir="D:\\mysqlfile" #改为linux下的目录
conn = pymysql.connect(
host="47.75.252.128",
port=6304,
user="kxd_push",
password="nOc08YM3G",
db="push_data",
charset="utf8")
def queryData(plan_dir):
try:
cursor = conn.cursor()
sql = "select fieldname from hago_push_plan_fieldname where planid='{value}'and dt='{dt}'".format(value=plan_dir,dt=time.strftime('%Y%m%d',time.localtime(time.time())))
cursor.execute(sql)
fieldname =cursor.fetchone()
result =fieldname[0]
return result
except Exception as e:
print(e)
def createTable(tableName,plan_dir):
try:
cursor =conn.cursor()
result =queryData(plan_dir)
fieldname = result.split(',')
fieldLen = len(fieldname)
for order,item in enumerate(fieldname):
field = item.split('|')[0]
type = item.split('|')[1]
if type=="int":
fieldType="int"
if type in ("double","float"):
fieldType="decimal(8,4)"
if type=="string":
fieldType="varchar(255)"
if order==0:
ctsql ="CREATE TABLE IF NOT EXISTS %s (%s %s)" %(tableName,field,fieldType) #建表sql
cursor.execute(ctsql)
else:
sql = "alter table %s add %s %s" % (tableName,field,fieldType)
cursor.execute(sql)
return fieldLen
except Exception as e:
print(e)
def insertBatch(filepath,tableName,fieldlen):
try:
cursor = conn.cursor()
total = 0 #一个文件记录插入条数
batchsize = 5
rows =[]
valueString = "%s," * fieldlen
sql = "insert into {tablename} values({value})".format(tablename=tableName, value=valueString[:-1])
with open(filepath) as f:
for line in f:
linelist = line.split(',')
rows.append(linelist)
if len(rows)>=batchsize:
total+=cursor.executemany(sql,rows)
conn.commit()
rows.clear()
total +=cursor.executemany(sql,rows)
conn.commit() #坑坑坑
return total
except Exception as e:
print(e)
def getPlanGroupDir(dir_path):
group_dir_list = []
count = 1
for root, dirs, files in os.walk(dir_path, topdown=True):
if count == 2:
plan_dir = root.split("\\")[-1] #linux 下更改
group_dir_list.append(plan_dir)
if count > 2:
group_dir = "g"+ root.split("\\")[-1]
group_dir_list.append(group_dir)
count += 1
return group_dir_list
if __name__ =="__main__":
dir_list =getPlanGroupDir(source_dir) #获取dir列表
plan_dir = dir_list[0]
result = queryData(plan_dir)
for groupid in dir_list[1:]:
fieldLen = createTable(groupid,plan_dir) #建表
group_dir =source_dir + "\\" + plan_dir + "\\" +groupid[1:] #分组目录
group_num =0 #记录每个group_id 插入的记录条数
for root,dir,files in os.walk(group_dir,topdown=True):
for name in files:
file_path =os.path.join(group_dir,name)
group_num+=insertBatch(file_path,groupid,fieldLen)
print("groupid:{value} 条数 :{num}".format(value=groupid,num=group_num))
conn.close()
----------------------------------------------------------------
其实可以把这些可以搞的很好看,搞个类封装一下,没必要好用即可。如果想知道如何在虚拟python3环境跑数据,点赞数超过100,我就在更新详细点,提示一下用shell脚本控制即可。