2021高教社杯全国大学生数学建模竞赛C题 问题一&问题二 Python代码

import pandas as pd
import warnings
warnings.filterwarnings("ignore")

path = '/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/'
d1 = pd.read_excel((path + '附件1 近5年402家供应商的相关数据.xlsx'), sheet_name='企业的订货量(m³)')
d2 = pd.read_excel((path + '附件1 近5年402家供应商的相关数据.xlsx'), sheet_name='供应商的供货量(m³)')
d3 = pd.read_excel((path + '附件2 近5年8家转运商的相关数据.xlsx'))

问题一

1.1 根据附件 1,对 402 家供应商的供货特征进行量化分析

https://dxs.moe.gov.cn/zx/a/hd_sxjm_sxjmlw_2021qgdxssxjmjslwzs/211025/1734085.shtml
在这里插入图片描述

计算供货特征

供货次数(正向)

d2_sub = d2.iloc[:,2:]
supply_times = d2_sub.apply(lambda x: sum(x!=0), axis=1)

平均供货量(正向)

supply_quantity_mean = d2_sub.apply(lambda x: sum(x), axis=1) / supply_times

单次最大供货量(正向)

supply_max = d2_sub.apply(lambda x: max(x), axis=1)

供货稳定性(负向)

d1_sub = d1.iloc[:,2:]
d12_sub = d1_sub.subtract(d2_sub) ** 2
supply_stability = d12_sub.apply(lambda x: sum(x), axis=1) / supply_times

供货连续性

  • 间隔次数(负向)
import numpy as np

gap_times = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):
    a = d2_sub.iloc[i,:] == 0
    gap_times[i] = (a&~np.r_[[False],a[:-1]]).sum() 
  • 平均间隔周数(负向)
gap_weeks_mean = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):
    index = [0] + list(np.where(d2_sub.iloc[i,:] != 0)[0]) + [241]
    new = np.diff(index)
    gap_weeks_mean[i] = sum(new[np.where((new != 1) & (new != 0))])

gap_weeks_mean = gap_weeks_mean / supply_times
  • 平均连续供货周数(正向)
supply_weeks_mean = [None] * d2_sub.shape[0]
for i in range(0,d2_sub.shape[0]):
    index = np.where(d2_sub.iloc[i,:] != 0)[0]
    new = np.where(np.diff(index) == 1)[0]
    supply_weeks_mean[i] = len(new) * 2 - len(np.where(np.diff(new) == 1)[0])
    
supply_weeks_mean = supply_weeks_mean / supply_times

合理供货比例(正向)

df = pd.DataFrame(None, columns=list(d2_sub.columns),
                  index=list(d2_sub.index))

for i in range(0,d2_sub.shape[0]):
    for j in range(0,d2_sub.shape[1]):
        if d1_sub.iloc[i,j] == 0:
            df.iloc[i,j] = 0
        if (d2_sub.iloc[i,j] > d1_sub.iloc[i,j] * 0.8) and (d2_sub.iloc[i,j] < d1_sub.iloc[i,j] * 1.2):
            df.iloc[i,j] = True
        else:
            df.iloc[i,j] = False
            
supply_proportion = df.apply(lambda x: sum(x), axis=1) / supply_times
数据标准化
df = pd.DataFrame(
    {'供货次数': supply_times,
     '平均供货量': supply_quantity_mean,
     '单次最大供货量': supply_max,
     '供货稳定性': supply_stability,
     '间隔次数': gap_times,
     '平均间隔周数': gap_weeks_mean,
     '平均连续供货周数': supply_weeks_mean,
     '合理供货比例': supply_proportion
    })

df.shape
(402, 8)
对正向指标归一化
df_positive = df[['供货次数','平均供货量','单次最大供货量','平均连续供货周数','合理供货比例']]
df_positive_norm = df_positive.apply(lambda x: (x-min(x)) / (max(x)-min(x)), axis=0)
df_positive_norm.head()
供货次数平均供货量单次最大供货量平均连续供货周数合理供货比例
00.1004180.0003280.0001350.3600000.360000
10.2928870.0009720.0017850.5774650.507042
20.7949790.0231570.0104410.9738220.727749
30.1338910.0003210.0001890.6363640.363636
40.4435150.0217270.0034351.0000000.859813
对负向指标归一化
df_negative = df[['供货稳定性','间隔次数','平均间隔周数']]
df_negative_norm = df_negative.apply(lambda x: (max(x)-x) / (max(x)-min(x)), axis=0)
df_negative_norm.head()
供货稳定性间隔次数平均间隔周数
00.9999990.8090910.960863
11.0000000.5909090.987411
20.9999880.8636360.998601
30.9999930.8090910.971365
41.0000000.9454550.994567
# merge data
df_norm = pd.concat([df_positive_norm, df_negative_norm], axis=1, join='inner')

1.2 建立反映保障企业生产重要性的数学模型

熵权法

https://www.kaggle.com/code/alpayabbaszade/entropy-topsis

首先对供货连续性下的3个二级指标进行加权

supply_continuity = df_norm[['间隔次数','平均间隔周数','平均连续供货周数']]
#Normalize Decision matrix
def norm(X):
    return X/X.sum()

supply_continuity_norm = norm(supply_continuity)
#Entropy Values
k = -(1/np.log(supply_continuity_norm.shape[0]))

def entropy(X):
    return (X*np.log(X)).sum()*k

entropy = entropy(supply_continuity_norm)

#degree of differentiation
dod = 1 - entropy
w = dod/dod.sum()
weights = w.sort_values(ascending = False)
weights
平均连续供货周数    0.594747
间隔次数        0.384353
平均间隔周数      0.020900
dtype: float64
supply_continuity_weighted = supply_continuity['间隔次数']*weights.iloc[0] + supply_continuity['平均间隔周数']*weights.iloc[1] + supply_continuity['平均连续供货周数']*weights.iloc[2]
df_norm.drop(['间隔次数','平均间隔周数','平均连续供货周数'], axis=1, inplace=True)
df_norm['供货连续性'] = supply_continuity_weighted
df_norm.head(3)
供货次数平均供货量单次最大供货量合理供货比例供货稳定性供货连续性
00.1004180.0003280.0001350.3600000.9999990.858039
10.2928870.0009720.0017850.5070421.0000000.743025
20.7949790.0231570.0104410.7277490.9999880.917813

对6个一级指标进行加权

#Normalize Decision matrix
def norm(X):
    return X/X.sum()

df_norm_new = norm(df_norm)

#Entropy Values
k = -(1/np.log(df_norm_new.shape[0]))

def entropy(X):
    return (X*np.log(X)).sum()*k

entropy = entropy(df_norm_new)

#degree of differentiation
dod = 1 - entropy
w = dod/dod.sum()
weights_entropy = w.sort_values(ascending = False)
weights_entropy
单次最大供货量    0.496440
平均供货量      0.392450
供货次数       0.091647
合理供货比例     0.016205
供货连续性      0.002825
供货稳定性      0.000433
dtype: float64
熵权法-TOPSIS
def norm(X):
    return X/np.sqrt((X**2).sum())

norm_matrix = norm(df_norm)
w_norm_matrix = norm_matrix*w

V_plus = w_norm_matrix.apply(max)
V_minus = w_norm_matrix.apply(min)

S_plus = np.sqrt(((w_norm_matrix - V_plus)**2).apply(sum, axis = 1))
S_minus = np.sqrt(((w_norm_matrix - V_minus)**2).apply(sum, axis = 1))
scores = S_minus/(S_plus + S_minus)
d2['综合得分'] = scores * 100
output = d2[['供应商ID','综合得分']]

# sort by scores
output = output.sort_values('综合得分', ascending=False)
output.iloc[0:50,:].to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_top50.csv')
output.iloc[0:50,:].head()
供应商ID综合得分
200S20187.972335
347S34857.756055
139S14052.993102
150S15144.951646
373S37441.858722
output.to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_all.csv')
AHP
df_norm.head()
供货次数平均供货量单次最大供货量合理供货比例供货稳定性供货连续性
00.1004180.0003280.0001350.3600000.9999990.858039
10.2928870.0009720.0017850.5070421.0000000.743025
20.7949790.0231570.0104410.7277490.9999880.917813
30.1338910.0003210.0001890.3636360.9999930.867852
40.4435150.0217270.0034350.8598131.0000000.965471
import ahpy
comparisons = {('供货次数', '平均供货量'): 3, ('供货次数', '单次最大供货量'): 5, ('供货次数', '合理供货比例'): 5, ('供货次数', '供货稳定性'): 5, ('供货次数', '供货连续性'): 5, 
                     ('平均供货量', '单次最大供货量'): 5, ('平均供货量', '合理供货比例'): 3, ('平均供货量', '供货稳定性'): 3, ('平均供货量', '供货连续性'): 3,
                     ('单次最大供货量', '合理供货比例'): 1/3, ('单次最大供货量', '供货稳定性'): 1/3, ('单次最大供货量', '供货连续性'): 1/3,
                     ('合理供货比例', '供货稳定性'): 1, ('合理供货比例', '供货连续性'): 1,
                     ('供货稳定性', '供货连续性'): 1}
comparisons
{('供货次数', '平均供货量'): 3,
 ('供货次数', '单次最大供货量'): 5,
 ('供货次数', '合理供货比例'): 5,
 ('供货次数', '供货稳定性'): 5,
 ('供货次数', '供货连续性'): 5,
 ('平均供货量', '单次最大供货量'): 5,
 ('平均供货量', '合理供货比例'): 3,
 ('平均供货量', '供货稳定性'): 3,
 ('平均供货量', '供货连续性'): 3,
 ('单次最大供货量', '合理供货比例'): 0.3333333333333333,
 ('单次最大供货量', '供货稳定性'): 0.3333333333333333,
 ('单次最大供货量', '供货连续性'): 0.3333333333333333,
 ('合理供货比例', '供货稳定性'): 1,
 ('合理供货比例', '供货连续性'): 1,
 ('供货稳定性', '供货连续性'): 1}
cal = ahpy.Compare(name='Drinks', comparisons=comparisons, precision=3, random_index='saaty')
cal.target_weights
{'供货次数': 0.445,
 '平均供货量': 0.232,
 '合理供货比例': 0.093,
 '供货稳定性': 0.093,
 '供货连续性': 0.093,
 '单次最大供货量': 0.044}
cal.consistency_ratio
0.032

CR < 0.1, 可认为判断矩阵的一致性可以接受

1.3 在此基础上确定 50 家最重要的供应商,并在论文中列表给出结果。

将熵权法和AHP得到的权重进行平均,得到最终的指标权重,然后加权计算各供应商的得分

weights_ahp = pd.DataFrame.from_dict(cal.target_weights, orient='index',
                       columns=['AHP权重'])
import statistics

results = pd.concat([weights_ahp, weights_entropy], axis=1)
results.columns = ['AHP权重','熵权法权重']
results['最终权重'] = results.apply(lambda x: statistics.mean(x), axis=1)
results
AHP权重熵权法权重最终权重
供货次数0.4450.0916470.268324
平均供货量0.2320.3924500.312225
合理供货比例0.0930.0162050.054603
供货稳定性0.0930.0004330.046716
供货连续性0.0930.0028250.047912
单次最大供货量0.0440.4964400.270220
d2['综合得分2'] = (df_norm['供货次数']*0.268324 + df_norm['平均供货量']*0.312225 + df_norm['合理供货比例']*0.054603 + df_norm['供货稳定性']*0.046716 + df_norm['供货连续性']*0.047912 + df_norm['单次最大供货量']*0.270220)*300
output = d2[['供应商ID','综合得分2']]

# sort by scores
output = output.sort_values('综合得分2', ascending=False)
output.iloc[0:50,:].to_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_top50_AHP&Entropy.csv')
# 对排名前10的供应商可视化
df = output.iloc[0:10,:]
df = df.sort_values(by='综合得分2')
df
供应商ID综合得分2
307S308160.745089
329S330164.460643
107S108174.260128
360S361174.874852
373S374175.082953
228S229179.242563
139S140196.710758
150S151197.078972
200S201199.599719
347S348200.332317
# Horizontal lollipop plot
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Linux show Chinese characters *** important
plt.rcParams['font.family'] = 'WenQuanYi Micro Hei' 
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

my_range=range(1,11)
plt.hlines(y=my_range, xmin=0, xmax=df['综合得分2'], color='skyblue')
plt.plot(df['综合得分2'], my_range, "o")
 
# Add titles and axis names
plt.yticks(my_range, df['供应商ID'])
plt.title("综合得分前10的供应商")
plt.xlabel('供应商综合得分')
plt.ylabel('供应商ID')

# Show the plot
plt.show()

在这里插入图片描述

问题二

Pyhon中智能算法的模块:

https://scikit-opt.github.io/scikit-opt/#/en/README

Python中优化的模块

https://docs.scipy.org/doc/scipy/tutorial/optimize.html

formular editor:

https://editor.codecogs.com/

2.1 参考问题 1,该企业应至少选择多少家供应商供应原材料才可能满足生产的需求?

在这里插入图片描述
在这里插入图片描述

平均损失率
α \alpha α

loss_rate = sum(d3.iloc[:,1:].apply(lambda x: sum(x) / sum(x!=0), axis=0)) / 240
loss_rate
1.3063532832341274

供货量 x ^ i , t \widehat{x}_{i,t} x i,t

supply_quantity_mean[:6]
0     1.960000
1     3.845070
2    68.785340
3     1.939394
4    64.598131
5     2.307692
dtype: float64

供应商得分 s i s_{i} si

scores = pd.read_csv('/home/shiyu/Desktop/path_acdemic/ant/数模/历年题目/2021/output/scores_all.csv')
scores = scores.iloc[:,1:]
index_A = np.where(d2['材料分类'] == 'A')[0]
index_B = np.where(d2['材料分类'] == 'B')[0]
index_C = np.where(d2['材料分类'] == 'C')[0]
import numpy as np

def func(y):
    # input y is a list with 402 dims
    # set y as int
    y = np.around(np.array(y))
    res = sum(y) / sum(y * scores['综合得分'])
    return res
constraint_eq = [
    lambda y: sum((np.around(np.array(y))[index_A] 
                   * supply_quantity_mean[index_A] 
                   * (1-loss_rate/100)) / 0.6) 
    + sum((np.around(np.array(y))[index_B] 
                    * supply_quantity_mean[index_B] 
                    * (1-loss_rate/100)) / 0.66) 
    + sum((np.around(np.array(y))[index_C] 
           * supply_quantity_mean[index_C] 
           * (1-loss_rate/100)) / 0.72) - 2.82 * 10**4
]
遗传算法
from sko.GA import GA
y_len = 402

ga = GA(func=func, n_dim=y_len, size_pop=50, 
        max_iter=800, prob_mut=0.001, 
        lb=[0]*y_len, ub=[1]*y_len, precision=1,
        constraint_eq=constraint_eq)
best_y_ga = ga.run()
suppliers_index_ga = np.where(best_y_ga[0] == 1)[0].tolist()
suppliers_ga = d2.iloc[suppliers_index_ga,0:2]
print('选择的供应商数量:', suppliers_ga.shape[0])
选择的供应商数量: 210
差异进化
from sko.DE import DE
y_len = 402
de = DE(func=func, n_dim=y_len, size_pop=50, max_iter=800, 
        lb=[0]*y_len, ub=[1]*y_len,
        constraint_eq=constraint_eq)

best_y_de = de.run()
y_de = np.around(best_y_de[0])
suppliers_index_de = np.where(y_de == 1)[0].tolist()
suppliers_de = d2.iloc[suppliers_index_de,0:2]
print('选择的供应商数量:', suppliers_de.shape[0])
选择的供应商数量: 184
粒子群优化(PSO)
from sko.PSO import PSO
y_len = 402
pso = PSO(func=func, n_dim=y_len, pop=40, max_iter=150, 
          lb=[0]*y_len, ub=[1]*y_len, 
          constraint_eq=constraint_eq)
best_y_pso = pso.run()
y_pso = np.around(best_y_pso[0])
suppliers_index_pso = np.where(y_pso == 1)[0].tolist()
suppliers_pso = d2.iloc[suppliers_index_pso,0:2]
print('选择的供应商数量:', suppliers_pso.shape[0])
选择的供应商数量: 152
import matplotlib.pyplot as plt
# Linux show Chinese characters *** important
plt.rcParams['font.family'] = 'WenQuanYi Micro Hei' 
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

plt.plot(pso.gbest_y_hist)
plt.title("目标函数的迭代过程")
plt.xlabel('迭代次数')
plt.ylabel('目标函数值')
plt.show()

在这里插入图片描述

相关阅读

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值