import pandas as pd
import numpy as np
from datetime import date
today=date.today()
downpath='/Users/kangyongqing/Downloads/'
file_shiduan='20230726_144255.csv'
file_quanxian='20230726_144308.csv'
dt1=pd.read_csv(downpath+file_shiduan)
# dt1=pd.read_csv(downpath+file_shiduan,usecols=[''])
dt1=dt1.loc[:,['老师id','月开放小时数','月可用小时']]
print(dt1.shape)
dt1.drop_duplicates(inplace=True)
#删除掉重复的行
print(dt1.shape)
dt2=pd.read_csv(downpath+file_quanxian)
dt2=dt2.loc[dt2['教管']!='DELUSER']
# print(dt1.info())
# print(dt2.info())
# print(dt2.pivot_table(index='教管',values='老师id',aggfunc='count'))
word1=['进阶版学期制小班课 C2']
word2=['标准版学期制小班课 C2']
word3=['拓展版 C1']
word4=['新加坡同步课 C1']
word5=['我的趣味中国之旅C1','我的中华美食课','小小故事绘 C1']
dt3=dt2.loc[sum(dt2['版本'].str.contains(word) for word in word5)>0]
dt4=pd.merge(dt1,dt3,on='老师id',how='inner')
dt4['老师id']=dt4['老师id'].astype(np.int64).astype(str)
print(dt4.info())
renshu=dt4.shape[0]
fabu=dt4['月开放小时数'].sum()
keyong=dt4['月可用小时'].sum()
writer=pd.ExcelWriter(downpath+str(word5[0])+str(today)+'.xlsx',engine='openpyxl')
dt4.to_excel(writer,sheet_name=str(word5[0]),index=False)
ws=writer.book.create_sheet('统计')
# writer.sheets['统计'].add_image(renshu,'E5')
ws.cell(row=2,column=4).value='人数'
ws.cell(row=3,column=4).value=renshu
ws.cell(row=2,column=5).value='月开放小时'
ws.cell(row=3,column=5).value=fabu
ws.cell(row=2,column=6).value='月可用小时'
ws.cell(row=3,column=6).value=keyong
#向指定的单元格插入数据
writer.save()
知识点:
- 数据去重,df.drop_duplicates(inplace=True)
- 包含筛选,及like用法,df.loc[sum(df['字段'].str.contains(word) for word in words)>0]
- 长id数值转换为字符串保存,避免科学计数法显示
- excel创建新的sheet,writer.book.create_sheet(sheet_name)
- 向excel的指定行列插入数值:ws=writer.book.create_sheet(sheet_name),ws.cell(row=,column=).value=值
- merge表连接的使用,merge(表A,表B,on='',how='')