pandas应用:表格之间的查找与关联

###测试成功版

import xlsxwriter
import openpyxl
import pandas as pd
import numpy as np

empty_df = pd.DataFrame()
empty_df.to_excel('merged_workbook.xlsx',index=False)

df1 = pd.read_excel('workbook1.xlsx',sheet_name='销货明细',dtype=str)
df2 = pd.read_excel('workbook2.xlsx',sheet_name='Sheet1',dtype=str)

df2 = df2.dropna(subset=['Batch'])

new_orders = df2[~df2['Batch'].isin(df1['Batch'])]

df1 = pd.concat([df1,new_orders[['Batch','Customer PO','Sold-To Name1','Delivery Date','DeliveryNo','Material','CustDeviceID','Die Qty','Ship-To','Reference Order','WaferID',]]])
df1.astype("string")
df1['Delivery Date'] = df1['Delivery Date'].str[0:10]  #把时分秒去掉,留下日期

df1 = pd.DataFrame(df1)

#计算金额

"""df1['单价(未税)'] = pd.to_numeric(df1['单价(未税)'])"""
new_orders['Unit Price'] = pd.to_numeric(new_orders['Unit Price'])
new_orders['Per'] = pd.to_numeric(new_orders['Per'])
"""df1['收入(未税)'] = pd.to_numeric(df1['收入(未税)'])"""
df1["Die Qty"]=pd.to_numeric(new_orders["Die Qty"])

df1['单价(未税)'] = np.where(new_orders['Per'] == 0,"0",new_orders['Unit Price']/new_orders['Per'])
df1['单价(未税)'] = pd.to_numeric(df1['单价(未税)'])
df1['收入(未税)'] = df1['单价(未税)']*df1["Die Qty"]

#区分服务还是货物销售,按不同税率

df1['金额(含税)'] = df1.apply(lambda row:row['收入(未税)']*1.06 if row['产品类型'] == 'Service' 
                            else row['收入(未税)']*1.13,axis =1)

df1['销售组'] = df2['Sales Org Descriptio']
df1['销售组'] = df1['销售组'].str[4: ]

df3 = pd.read_excel('database.xlsx',sheet_name='产品线base',dtype=str)
df3_renamed = df3.rename(columns={'Product\n产品型号':'New_col','Product Line\n产品线':'经营PL'})

df4 = pd.read_excel('Revenue forecast to Finance.xlsx',sheet_name='20231114',dtype=str)
df4 = df4[['Product\n产品型号','品类']]
df4_renamed = df4.rename(columns={'Product\n产品型号':'New_col','品类':'Platform'})
print(df4_renamed)

#按material号检索产品线
def process_data(row):
    if row['Material'].startswith('K'):
        return row['CustDeviceID']
    else:
        return row['Material']

    return df1
    
# 应用自定义函数到DataFrame中的每一行
df1['new_column'] = df1.apply(process_data, axis=1)
df_new = df1
df_new['New_col'] = df1['new_column'].apply(lambda x: x[1:] if x.startswith('P') else x)
df_new.pop('new_column')

f_df = pd.merge(df_new,df3_renamed,on = 'New_col',how='left')
f_df = pd.merge(f_df,df4_renamed,on = 'New_col',how='left')
f_df['产品线']=f_df['New_col']
f_df['经营PL_x']=f_df['经营PL_y']
f_df['Platform_x']=f_df['Platform_y']
ff_df=f_df.drop(columns = ['New_col','经营PL_y','Platform_y'])

ff_df.to_excel('merged_workbook.xlsx',index=False,engine='xlsxwriter')

运行以上代码结果表格中出现好几次重复值。待查找原因。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值