###测试成功版
import xlsxwriter
import openpyxl
import pandas as pd
import numpy as np
empty_df = pd.DataFrame()
empty_df.to_excel('merged_workbook.xlsx',index=False)
df1 = pd.read_excel('workbook1.xlsx',sheet_name='销货明细',dtype=str)
df2 = pd.read_excel('workbook2.xlsx',sheet_name='Sheet1',dtype=str)
df2 = df2.dropna(subset=['Batch'])
new_orders = df2[~df2['Batch'].isin(df1['Batch'])]
df1 = pd.concat([df1,new_orders[['Batch','Customer PO','Sold-To Name1','Delivery Date','DeliveryNo','Material','CustDeviceID','Die Qty','Ship-To','Reference Order','WaferID',]]])
df1.astype("string")
df1['Delivery Date'] = df1['Delivery Date'].str[0:10] #把时分秒去掉,留下日期
df1 = pd.DataFrame(df1)
#计算金额
"""df1['单价(未税)'] = pd.to_numeric(df1['单价(未税)'])"""
new_orders['Unit Price'] = pd.to_numeric(new_orders['Unit Price'])
new_orders['Per'] = pd.to_numeric(new_orders['Per'])
"""df1['收入(未税)'] = pd.to_numeric(df1['收入(未税)'])"""
df1["Die Qty"]=pd.to_numeric(new_orders["Die Qty"])
df1['单价(未税)'] = np.where(new_orders['Per'] == 0,"0",new_orders['Unit Price']/new_orders['Per'])
df1['单价(未税)'] = pd.to_numeric(df1['单价(未税)'])
df1['收入(未税)'] = df1['单价(未税)']*df1["Die Qty"]
#区分服务还是货物销售,按不同税率
df1['金额(含税)'] = df1.apply(lambda row:row['收入(未税)']*1.06 if row['产品类型'] == 'Service'
else row['收入(未税)']*1.13,axis =1)
df1['销售组'] = df2['Sales Org Descriptio']
df1['销售组'] = df1['销售组'].str[4: ]
df3 = pd.read_excel('database.xlsx',sheet_name='产品线base',dtype=str)
df3_renamed = df3.rename(columns={'Product\n产品型号':'New_col','Product Line\n产品线':'经营PL'})
df4 = pd.read_excel('Revenue forecast to Finance.xlsx',sheet_name='20231114',dtype=str)
df4 = df4[['Product\n产品型号','品类']]
df4_renamed = df4.rename(columns={'Product\n产品型号':'New_col','品类':'Platform'})
print(df4_renamed)
#按material号检索产品线
def process_data(row):
if row['Material'].startswith('K'):
return row['CustDeviceID']
else:
return row['Material']
return df1
# 应用自定义函数到DataFrame中的每一行
df1['new_column'] = df1.apply(process_data, axis=1)
df_new = df1
df_new['New_col'] = df1['new_column'].apply(lambda x: x[1:] if x.startswith('P') else x)
df_new.pop('new_column')
f_df = pd.merge(df_new,df3_renamed,on = 'New_col',how='left')
f_df = pd.merge(f_df,df4_renamed,on = 'New_col',how='left')
f_df['产品线']=f_df['New_col']
f_df['经营PL_x']=f_df['经营PL_y']
f_df['Platform_x']=f_df['Platform_y']
ff_df=f_df.drop(columns = ['New_col','经营PL_y','Platform_y'])
ff_df.to_excel('merged_workbook.xlsx',index=False,engine='xlsxwriter')
运行以上代码结果表格中出现好几次重复值。待查找原因。