导入所需要的包,链接数据库
import time
import pandas as pd
import warnings,datetime
import pymysql
import os,glob
warnings.filterwarnings('ignore')
conn = pymysql.connect(host="you datebaes host", user="username", passwd="password", db="db_name", port=9030)
cursor = conn.cursor()
获取表中最大 ID ,因为搭建Doris的技术问题,无法实现ID自增
def get_max_id(datebase_name):
mycursor.execute(f"select MAX(id) from <you_db_name>.{datebase_name};")
result = mycursor.fetchall()
return result[0][0]
获取指定文件夹下子文件夹的文件
folder_paths = r'C:\阿里妈妈'
filenames = os.listdir(folder_paths)
files = [os.path.join(folder_paths,file_name) for file_name in filenames]
for file in files:
file_pathz = glob.glob(file+'/*')
if len(file_pathz) == 1:
shop_data(file_pathz[0])
计算时间差
def ex_date(s):
strp_s = datetime.datetime.strptime(s, '%Y-%m-%d')
d_value = datetime.date.today() + datetime.timedelta(days=-1) - strp_s.date()
return d_value.days
pandas处理数据表格
folder_path = "C:\阿里妈妈\内容营销短直联动_珍致旗舰店_2024-07-08\多多客服-客服数据-客服绩效数据-客服绩效详情.csv"
data = pd.read_csv(folder_path,encoding = 'gb2312')
shop_name = folder_path.split('\\')[2].split('_')[1]
data.rename(columns={'日期': 'RiQi', '场景ID': 'ChangJingId', '场景名字': 'ChangJingMingZi', '计划ID': 'JiHuaId',
'计划名字': 'JiHuaMingZi', '主体ID': 'ZhuTiId', '主体类型': 'ZhuTiLeiXing'}, inplace=True)
data['extratime'] = data['RiQi'].apply(lambda s: ex_date(s))
data.fillna(value=0, inplace=True)
data.fillna(value='', inplace=True)
data.replace('\n', '', regex=True, inplace=True)
data.replace('\t', '', regex=True, inplace=True)
if get_max_id(you_datebase_name) == None:
time_id = 0
else:
time_id = int(get_max_id(you_datebase_name))+1
id_list = [time_id+i for i in range(data.count()[0])]
data['id'] = id_list
data['shop_name'] = shop_name
valuez = str(tuple(data.keys())).replace("'",'`')
sql_text = f"INSERT INTO {datebase} {valuez} VALUES"
for column_name, column_data in data.iterrows():
sql_text += f"{tuple(column_data)},"
cursor.execute(sql_text[:-1])
os.remove(folder_path)