利用python 封装函数,计算加法,乘法, 除法贡献度

上篇文章简单介绍了一下加法,乘法,除法贡献度的计算方法,并手动实现了一下过程,文章链接:加法、乘法、除法贡献度计算实现

这篇文章想要把贡献度计算方法封装成一个函数,在传入json输入的时候,自动计算输出结果,其中乘法贡献度的计算方法有做更改,具体如下

一、加载需要的包

import pandas as pd
import numpy as np
import json

二、读取数据(加法,其中A001+A002+A003 = A01)

df_add = pd.read_excel("C:/Users/supaur/Desktop/value.xlsx")
df_add
index_numindex_nametimeperiodvalue
01A012022-08-22day1000
12A0012022-08-22day300
23A0022022-08-22day200
34A0032022-08-22day500
41A012022-08-21day1900
52A0012022-08-21day1000
63A0022022-08-21day500
74A0032022-08-21day400

三、查看传入的加法json并解析出相应的参数

json_file = "C:/Users/supaur/Desktop/add.json"
with open(json_file, "r") as fp:
    data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220821', '20220822'], 'target1': 'A01', 'target2': ['A001', 'A002', 'A003'], 'target2_1': [], 'target2_2': [], 'type': 'add', 'timetype': 'day'}

四、编写加法贡献度函数

# 生成两个df,分别为参数date里的两个日期,打印出来看看结果
df1_add = df_add[(df_add["time"] == date[0]) & (df_add["period"] == timetype)]
df2_add = df_add[(df_add["time"] == date[1]) & (df_add["period"] == timetype)]
print(df1_add)
print(df2_add)
   index_num index_name       time period  value
4          1        A01 2022-08-21    day   1900
5          2       A001 2022-08-21    day   1000
6          3       A002 2022-08-21    day    500
7          4       A003 2022-08-21    day    400
   index_num index_name       time period  value
0          1        A01 2022-08-22    day   1000
1          2       A001 2022-08-22    day    300
2          3       A002 2022-08-22    day    200
3          4       A003 2022-08-22    day    500
# df1与df2 按照字段名称拼接成下面格式
df3_add = pd.merge(df1_add, df2_add, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_add
index_num_1index_nametime_1period_1value_1index_num_2time_2period_2value_2
01A012022-08-21day190012022-08-22day1000
12A0012022-08-21day100022022-08-22day300
23A0022022-08-21day50032022-08-22day200
34A0032022-08-21day40042022-08-22day500
# 添加一列字段,得到对应字段两个日期的差值
df3_add["D_value"] = df3_add.loc[:, "value_2"] - df3_add.loc[:, "value_1"]
df3_add
index_num_1index_nametime_1period_1value_1index_num_2time_2period_2value_2D_value
01A012022-08-21day190012022-08-22day1000-900
12A0012022-08-21day100022022-08-22day300-700
23A0022022-08-21day50032022-08-22day200-300
34A0032022-08-21day40042022-08-22day500100
# 利用for循环,计算每个target2的贡献度,其中计算方法为target2所在的行,D_value所在列 /target1所在的行,D_value所在列,得到每个target2的贡献度
for i in target2:
    df3_add["%s_contribution" % i] = round(df3_add.loc[df3_add["index_name"] == i, "D_value"].tolist()[0] / df3_add.loc[df3_add["index_name"] == target1, "D_value"].tolist()[0] *100,2)
df3_add    
index_num_1index_nametime_1period_1value_1index_num_2time_2period_2value_2D_valueA001_contributionA002_contributionA003_contribution
01A012022-08-21day190012022-08-22day1000-90077.7833.33-11.11
12A0012022-08-21day100022022-08-22day300-70077.7833.33-11.11
23A0022022-08-21day50032022-08-22day200-30077.7833.33-11.11
34A0032022-08-21day40042022-08-22day50010077.7833.33-11.11
# 把以上代码组合封装成functionadd函数,创建一个空列表,把贡献度值添加到列表里,并调用该函数打印结果如下:
def functionadd(date,target1,target2,timetype):

    df1_add = df_add[(df_add["time"] == date[0]) & (df_add["period"] == timetype)] 
    df2_add = df_add[(df_add["time"] == date[1]) & (df_add["period"] == timetype)]
    df3_add = pd.merge(df1, df2, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
    df3_add["D_value"] = df3_add.loc[:, "value_2"] - df3_add.loc[:, "value_1"]
    re = []
    for i in target2:
        df3_add["%s_contribution" % i] = round(df3_add.loc[df3_add["index_name"] == i, "D_value"].tolist()[0] / df3_add.loc[df3_add["index_name"] == target1, "D_value"].tolist()[0] *100,2)
        re.append(("%s_contribution" % i, df3_add["%s_contribution" % i].tolist()[0]))
    return re
        
print(functionadd(date,target1,target2,timetype))
[('A001_contribution', 29.94), ('A002_contribution', 19.97), ('A003_contribution', 49.99)]

五、乘法跟加法类似,再开始加法之前先对各指标求log,变成加法计算

# 查看传入的乘法json并解析出相应的参数
json_file = "C:/Users/supaur/Desktop/times.json"
with open(json_file, "r") as fp:
    data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220801', '20220901'], 'target1': 'B01', 'target2': ['B001', 'B002'], 'target2_1': [], 'target2_2': [], 'type': 'times', 'timetype': 'month'}
# 查看乘法数据,其中A001*A002*A003 = A01 , B001*B002 = B01
df_times = pd.read_excel("C:/Users/supaur/Desktop/times.xlsx") 
df_times
index_numindex_nametimeperiodvalue
01A012022-08-22day30000.00
12A0012022-08-22day0.30
23A0022022-08-22day200.00
34A0032022-08-22day500.00
41A012022-08-21day20000.00
52A0012022-08-21day0.10
63A0022022-08-21day500.00
74A0032022-08-21day400.00
81A012022-08-20day32000.00
92A0012022-08-20day0.20
103A0022022-08-20day400.00
114A0032022-08-20day400.00
125B012022-08-01month150000.00
136B0012022-08-01month0.30
147B0022022-08-01month500000.00
155B012022-09-01month107500.00
166B0012022-09-01month0.25
177B0022022-09-01month430000.00
185B012022-10-01month185500.00
196B0012022-10-01month0.35
207B0022022-10-01month530000.00
# 编写乘法函数,加法是直接求两日期差值,乘法是先取对数log,再变成加法计算
def functiontimes(date,target1,target2,timetype):

    df1_times = df_times[(df_times["time"] == date[0]) & (df_times["period"] == timetype)]
    df2_times = df_times[(df_times["time"] == date[1]) & (df_times["period"] == timetype)]
    df3_times = pd.merge(df1_times, df2_times, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
    df3_times["D_value"] = np.log10(df3_times.loc[:, "value_2"]) - np.log10(df3_times.loc[:, "value_1"])
    re = []
    for i in target2:
        df3_times["%s_contribution" % i] = round(df3_times.loc[df3_times["index_name"] == i, "D_value"].tolist()[0] / df3_times.loc[df3_times["index_name"] == target1, "D_value"].tolist()[0] *100,2)
        re.append(("%s_contribution" % i, df3_times["%s_contribution" % i].tolist()[0]))
    return re


print(functiontimes(date,target1,target2,timetype))
[('B001_contribution', 54.73), ('B002_contribution', 45.27)]

六、除法贡献度函数

# 查看传入的除法json并解析出相应的参数,除法要比加法和乘法多两个参数target2_1,target2_2
json_file = "C:/Users/supaur/Desktop/div.json"
with open(json_file, "r") as fp:
    data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220822', '20220823'], 'target1': 'A01', 'target2': ['A001', 'A002', 'A003'], 'target2_1': ['A001_1', 'A002_1', 'A003_1'], 'target2_2': ['A001_2', 'A002_2', 'A003_2'], 'type': 'div', 'timetype': 'day'}
# 查看除法数据,其中A001 = A001_1/A001_2 , A002 = A002_1/A002_2 , 以此类推,A01 = (A001_1 + A002_1 + A003_1) / (A001_2 + A002_2 + A003_2)
df_div = pd.read_excel("C:/Users/supaur/Desktop/div.xlsx")
df_div
index_numindex_nametimeperiodvalue
01A012022-08-22day0.840546
12A0012022-08-22day0.882581
22A001_12022-08-22day684.000000
32A001_22022-08-22day775.000000
43A0022022-08-22day0.732308
53A002_12022-08-22day714.000000
63A002_22022-08-22day975.000000
74A0032022-08-22day0.930061
84A003_12022-08-22day758.000000
94A003_22022-08-22day815.000000
101A012022-08-23day0.448011
112A0012022-08-23day0.696589
122A001_12022-08-23day388.000000
132A001_22022-08-23day557.000000
143A0022022-08-23day0.415882
153A002_12022-08-23day309.000000
163A002_22022-08-23day743.000000
174A0032022-08-23day0.322368
184A003_12022-08-23day294.000000
194A003_22022-08-23day912.000000
# 与加法和乘法一样 生成两个df,分别为参数date里的两个日期
df1_div = df_div[(df_div["time"] == date[0]) & (df_div["period"] == timetype)]
df2_div = df_div[(df_div["time"] == date[1]) & (df_div["period"] == timetype)]
print(df1_div)
print(df2_div)
   index_num index_name       time period       value
0          1        A01 2022-08-22    day    0.840546
1          2       A001 2022-08-22    day    0.882581
2          2     A001_1 2022-08-22    day  684.000000
3          2     A001_2 2022-08-22    day  775.000000
4          3       A002 2022-08-22    day    0.732308
5          3     A002_1 2022-08-22    day  714.000000
6          3     A002_2 2022-08-22    day  975.000000
7          4       A003 2022-08-22    day    0.930061
8          4     A003_1 2022-08-22    day  758.000000
9          4     A003_2 2022-08-22    day  815.000000
    index_num index_name       time period       value
10          1        A01 2022-08-23    day    0.448011
11          2       A001 2022-08-23    day    0.696589
12          2     A001_1 2022-08-23    day  388.000000
13          2     A001_2 2022-08-23    day  557.000000
14          3       A002 2022-08-23    day    0.415882
15          3     A002_1 2022-08-23    day  309.000000
16          3     A002_2 2022-08-23    day  743.000000
17          4       A003 2022-08-23    day    0.322368
18          4     A003_1 2022-08-23    day  294.000000
19          4     A003_2 2022-08-23    day  912.000000
# 求所有target2_1 的和,新增列为sum_1
sum_1 = 0
for i in target2_1:
    sum_1 = sum_1 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_1"] = sum_1
df1_div
index_numindex_nametimeperiodvaluesum_1
01A012022-08-22day0.8405462156.0
12A0012022-08-22day0.8825812156.0
22A001_12022-08-22day684.0000002156.0
32A001_22022-08-22day775.0000002156.0
43A0022022-08-22day0.7323082156.0
53A002_12022-08-22day714.0000002156.0
63A002_22022-08-22day975.0000002156.0
74A0032022-08-22day0.9300612156.0
84A003_12022-08-22day758.0000002156.0
94A003_22022-08-22day815.0000002156.0
# 求所有target2_2 的和,新增列为sum_2
sum_2 = 0
for i in target2_2:
    sum_2 = sum_2 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_2"] = sum_2
df1_div
index_numindex_nametimeperiodvaluesum_1sum_2
01A012022-08-22day0.8405462156.02565.0
12A0012022-08-22day0.8825812156.02565.0
22A001_12022-08-22day684.0000002156.02565.0
32A001_22022-08-22day775.0000002156.02565.0
43A0022022-08-22day0.7323082156.02565.0
53A002_12022-08-22day714.0000002156.02565.0
63A002_22022-08-22day975.0000002156.02565.0
74A0032022-08-22day0.9300612156.02565.0
84A003_12022-08-22day758.0000002156.02565.0
94A003_22022-08-22day815.0000002156.02565.0
# df1_div, df2_div 按照字段名称拼接成下面格式
df3_div = pd.merge(df1_div, df2_div, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_div
index_num_1index_nametime_1period_1value_1sum_1sum_2index_num_2time_2period_2value_2
01A012022-08-22day0.8405462156.02565.012022-08-23day0.448011
12A0012022-08-22day0.8825812156.02565.022022-08-23day0.696589
22A001_12022-08-22day684.0000002156.02565.022022-08-23day388.000000
32A001_22022-08-22day775.0000002156.02565.022022-08-23day557.000000
43A0022022-08-22day0.7323082156.02565.032022-08-23day0.415882
53A002_12022-08-22day714.0000002156.02565.032022-08-23day309.000000
63A002_22022-08-22day975.0000002156.02565.032022-08-23day743.000000
74A0032022-08-22day0.9300612156.02565.042022-08-23day0.322368
84A003_12022-08-22day758.0000002156.02565.042022-08-23day294.000000
94A003_22022-08-22day815.0000002156.02565.042022-08-23day912.000000
# 添加一列字段,得到对应字段两个日期的差值
df3_div["D_value"] = df3_div.loc[:, "value_2"] - df3_div.loc[:, "value_1"]
df3_div
index_num_1index_nametime_1period_1value_1sum_1sum_2index_num_2time_2period_2value_2D_value
01A012022-08-22day0.8405462156.02565.012022-08-23day0.448011-0.392535
12A0012022-08-22day0.8825812156.02565.022022-08-23day0.696589-0.185992
22A001_12022-08-22day684.0000002156.02565.022022-08-23day388.000000-296.000000
32A001_22022-08-22day775.0000002156.02565.022022-08-23day557.000000-218.000000
43A0022022-08-22day0.7323082156.02565.032022-08-23day0.415882-0.316426
53A002_12022-08-22day714.0000002156.02565.032022-08-23day309.000000-405.000000
63A002_22022-08-22day975.0000002156.02565.032022-08-23day743.000000-232.000000
74A0032022-08-22day0.9300612156.02565.042022-08-23day0.322368-0.607693
84A003_12022-08-22day758.0000002156.02565.042022-08-23day294.000000-464.000000
94A003_22022-08-22day815.0000002156.02565.042022-08-23day912.00000097.000000
# 求每个target2 的贡献度,计算方法为每个 target2_1 的 D_value + sum_1 的和 / 每个 target2_2 的 D_value + sum_2 的和  - target1 的 value_1
# 再对target2 的贡献度求和
sum = 0
for (a ,i ,j) in zip(target2,target2_1,target2_2):
    df3_div["%s_contribution" % a] = (df3_div.loc[df3_div["index_name"] == i, "sum_1"].tolist()[0] + df3_div.loc[df3_div["index_name"] == i,"D_value"].tolist()[0]) / (df3_div.loc[df3_div["index_name"] == j, "sum_2"].tolist()[0] + df3_div.loc[df3_div["index_name"] == j,"D_value"].tolist()[0]) - df3_div.loc[df3_div["index_name"] == target1, "value_1"].tolist()[0]
    sum = sum + df3_div["%s_contribution" % a]
df3_div["sum"] = sum
df3_div
index_num_1index_nametime_1period_1value_1sum_1sum_2index_num_2time_2period_2value_2D_valueA001_contributionA002_contributionA003_contributionsum
01A012022-08-22day0.8405462156.02565.012022-08-23day0.448011-0.392535-0.048045-0.09001-0.204933-0.342988
12A0012022-08-22day0.8825812156.02565.022022-08-23day0.696589-0.185992-0.048045-0.09001-0.204933-0.342988
22A001_12022-08-22day684.0000002156.02565.022022-08-23day388.000000-296.000000-0.048045-0.09001-0.204933-0.342988
32A001_22022-08-22day775.0000002156.02565.022022-08-23day557.000000-218.000000-0.048045-0.09001-0.204933-0.342988
43A0022022-08-22day0.7323082156.02565.032022-08-23day0.415882-0.316426-0.048045-0.09001-0.204933-0.342988
53A002_12022-08-22day714.0000002156.02565.032022-08-23day309.000000-405.000000-0.048045-0.09001-0.204933-0.342988
63A002_22022-08-22day975.0000002156.02565.032022-08-23day743.000000-232.000000-0.048045-0.09001-0.204933-0.342988
74A0032022-08-22day0.9300612156.02565.042022-08-23day0.322368-0.607693-0.048045-0.09001-0.204933-0.342988
84A003_12022-08-22day758.0000002156.02565.042022-08-23day294.000000-464.000000-0.048045-0.09001-0.204933-0.342988
94A003_22022-08-22day815.0000002156.02565.042022-08-23day912.00000097.000000-0.048045-0.09001-0.204933-0.342988
# 标准化,每个target2贡献度值/ 所有target2 总和 得到每个 target2 标准化后的贡献度
for b in target2:
    df3_div["%s_nor_contribution" % b] = round(df3_div.loc[df3_div["index_name"] == b, "%s_contribution" % b].tolist()[0] / df3_div.loc[df3_div["index_name"] == b, "sum"].tolist()[0] * 100, 2)
df3_div    
index_num_1index_nametime_1period_1value_1sum_1sum_2index_num_2time_2period_2value_2D_valueA001_contributionA002_contributionA003_contributionsumA001_nor_contributionA002_nor_contributionA003_nor_contribution
01A012022-08-22day0.8405462156.02565.012022-08-23day0.448011-0.392535-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
12A0012022-08-22day0.8825812156.02565.022022-08-23day0.696589-0.185992-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
22A001_12022-08-22day684.0000002156.02565.022022-08-23day388.000000-296.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
32A001_22022-08-22day775.0000002156.02565.022022-08-23day557.000000-218.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
43A0022022-08-22day0.7323082156.02565.032022-08-23day0.415882-0.316426-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
53A002_12022-08-22day714.0000002156.02565.032022-08-23day309.000000-405.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
63A002_22022-08-22day975.0000002156.02565.032022-08-23day743.000000-232.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
74A0032022-08-22day0.9300612156.02565.042022-08-23day0.322368-0.607693-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
84A003_12022-08-22day758.0000002156.02565.042022-08-23day294.000000-464.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
94A003_22022-08-22day815.0000002156.02565.042022-08-23day912.00000097.000000-0.048045-0.09001-0.204933-0.34298814.0126.2459.75
# 对上述代码封装成除法贡献度函数
def functiondiv(date,target1,target2,target2_1,target2_2,timetype):

    df1_div = df_div[(df_div["time"] == date[0]) & (df_div["period"] == timetype)]
    df2_div = df_div[(df_div["time"] == date[1]) & (df_div["period"] == timetype)]
    sum_1 = 0
    for i in target2_1:
        sum_1 = sum_1 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
    df1_div["sum_1"] = sum_1
    sum_2 = 0
    for i in target2_2:
        sum_2 = sum_2 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
    df1_div["sum_2"] = sum_2
    df3_div = pd.merge(df1_div, df2_div, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
    df4_div = df3_div.loc[:, ["index_num_1", "index_name", "time_1", "time_2", "period_1", "value_1", "value_2","sum_1","sum_2"]]
    df4_div["D_value"] = df4_div.loc[:, "value_2"] - df4_div.loc[:, "value_1"]
    sum = 0
    for (a ,i ,j) in zip(target2,target2_1,target2_2):
        df4_div["%s_contribution" % a] = (df4_div.loc[df4_div["index_name"] == i, "sum_1"].tolist()[0] + df4_div.loc[df4_div["index_name"] == i,"D_value"].tolist()[0]) / (df4_div.loc[df4_div["index_name"] == j, "sum_2"].tolist()[0] + df4_div.loc[df4_div["index_name"] == j,"D_value"].tolist()[0]) - df4_div.loc[df4_div["index_name"] == target1, "value_1"].tolist()[0]
        sum = sum + df4_div["%s_contribution" % a]
    df4_div["sum"] = sum
    re = []
    for b in target2:
        df4_div["%s_nor_contribution" % b] = round(df4_div.loc[df4_div["index_name"] == b, "%s_contribution" % b].tolist()[0] / df4_div.loc[df4_div["index_name"] == b, "sum"].tolist()[0] * 100, 2)
        re.append(("%s_nor_contribution" % b, df4_div["%s_nor_contribution" % b].tolist()[0]))
    return re

print(functiondiv(date, target1, target2,target2_1,target2_2, timetype))
[('A001_nor_contribution', 14.01), ('A002_nor_contribution', 26.24), ('A003_nor_contribution', 59.75)]
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值