python office automation
import os
import pandas as pd
## use os to arrive specified address
os.chdir('C:\\Users\\lenovo\\Desktop\\data')
# read and write excel document
name='垂钓装备&绑钩器.xlsx'
df=pd.read_excel(name)
#check data
# df.unique can output one index all unique element of list form
df['日期'].unique()
# calculate sale
df['销售额']=df['转化率']*df['访客数']*df['客单价']
df.head()
# df.groupby play a role which classify '品牌' to sum value
# add colunm
df_sum=df.groupby('品牌')['销售额'].sum().reset_index()
df_sum['行业']='专项户外运动装备&冰爪'
print(df_sum)
# aim at all documents
result=pd.DataFrame() //add a write form
for name in os.listdir():// it can value all document name in the dir
df=pd.read_excel(name) // read and write document
# calculate sale
df['销售额']=df['转化率']*df['访客数']*df['客单价']
df_sum=df.groupby('品牌')['销售额'].sum().reset_index()
df_sum['行业']=name.replace('.xlxs','')
# many df should be connected a df
result=pd.concat([result,df_sum])
# df should use groupby to calculate the total sale of every grand;descending order
df_final=result.groupby('品牌')['销售额'].sum().reset_index().sort_values('销售额',ascending = False)
# it should be changed into scientific notation
pd.set_option('display.float_format',lambda x:'%.2f'%x)
print(df_final)
df_final.head()
思维导图
1.# df should use groupby to calculate the total sale of every grand;descending order
df_sum=df.groupby('品牌')['销售额'].sum().reset_index()
2.# many df should be connected a df
result=pd.concat([result,df_sum])
3. df_sum['行业']=name.replace('.xlxs','')
# this method is good for me
合并求值
对文件夹的操作
多张表格合并