2021 年高教社杯全国大学生数学建模竞赛 E 题 中药材的鉴别 第二题

问题 2. 根据附件 2 中某一种药材的中红外光谱数据,分析不同产地药材的特征和差异性,试鉴别药材的产地,并将下表中所给出编号的药材产地的鉴别结果填入表格中。

不同产地中药材差异分析

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = 'SimHei' # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

data = pd.read_excel('附件2.xlsx', index_col = 0)
data
df_1 = pd.DataFrame(np.mean(data[data['OP']==1.0].drop('OP', axis=1)))
df_2 = pd.DataFrame(np.mean(data[data['OP']==2.0].drop('OP', axis=1)))
df_3 = pd.DataFrame(np.mean(data[data['OP']==3.0].drop('OP', axis=1)))
df_4 = pd.DataFrame(np.mean(data[data['OP']==4.0].drop('OP', axis=1)))
df_5 = pd.DataFrame(np.mean(data[data['OP']==5.0].drop('OP', axis=1)))
df_6 = pd.DataFrame(np.mean(data[data['OP']==6.0].drop('OP', axis=1)))
df_7 = pd.DataFrame(np.mean(data[data['OP']==7.0].drop('OP', axis=1)))
df_8 = pd.DataFrame(np.mean(data[data['OP']==8.0].drop('OP', axis=1)))
df_9 = pd.DataFrame(np.mean(data[data['OP']==9.0].drop('OP', axis=1)))
df_10 = pd.DataFrame(np.mean(data[data['OP']==10.0].drop('OP', axis=1)))
df_11 = pd.DataFrame(np.mean(data[data['OP']==11.0].drop('OP', axis=1)))
index_0 = range(551, 3999)

plt.figure(figsize=(8, 6), dpi = 300)
plt.xticks(range(551, 3999, 500))
plt.plot(df_1, c = 'r', label = '产地一')
plt.plot(df_2, c = 'b', label = '产地二')
plt.plot(df_3, c = 'k', label = '产地三')
plt.plot(df_4, c = 'y', label = '产地四')
plt.plot(df_5, c = 'g', label = '产地五')
plt.plot(df_6, c = 'm', label = '产地六')
plt.plot(df_7, c = 'c', label = '产地七')
plt.plot(df_8, c = 'tan', label = '产地八')
plt.plot(df_9, c = 'orange', label = '产地九')
plt.plot(df_10, c = 'sienna', label = '产地十')
plt.plot(df_11, c = 'peru', label = '产地十一')
plt.grid(True)
plt.legend(loc='upper left', numpoints=2, ncol=1, fontsize=8, bbox_to_anchor=(1.04, 1))
plt.xlabel('波数(cm^-1)')
plt.ylabel('吸光度(AU)')
plt.title('不同产地药材光谱数据均值曲线')
plt.savefig('不同产地药材光谱数据均值曲线.png')
plt.show()

def SID(x, y):
    p = np.zeros_like(x, dtype=np.float)
    q = np.zeros_like(y, dtype=np.float)
    Sid = 0
    for i in range(len(x)):
        p[i] = x[i]/np.sum(x)
        q[i] = y[i]/np.sum(y)
    for j in range(len(x)):
        Sid += np.sum(p[j]*np.log10(p[j]/q[j]))+np.sum(q[j]*np.log10(q[j]/p[j]))
    return Sid
# 一/二
SID(df_1.values.T, df_2.values.T)
#一/三
SID(df_1.values.T, df_3.values.T)
#一/四
SID(df_1.values.T, df_4.values.T)

...

欧氏距离、支持向量机分类

import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = 'SimHei' # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

data = pd.read_excel('附件2.xlsx', index_col = 0)
data.shape
data.describe([0.01, 0.25, 0.75, 0.99]).T

data.info()

data.iloc[:,1].isnull().any().any()

data.iloc[:,0].value_counts().sort_index() # 统计每个产地药材数量并排序

data_X = data.iloc[:, 1:]
data_X = pd.DataFrame(data_X.iloc[:,1:].values-data_X.iloc[:,:-1].values, index = data.index)
data_Y = data.iloc[:,0]
data_Y.fillna(0, inplace = True) #用0填充op空值 未知产地药材
data_Y = data_Y.astype('int')
qsz_index = data_Y[data_Y==0].index #保留缺失值索引
def func_1(x):
    plt.plot(x.index, x.values)
def func_2(data):
    fontsize = 5
    plt.figure(figsize=(8, 6), dpi = 300)
    plt.xticks(range(551, 3999, 500), rotation = 45, fontsize = 5)
    plt.yticks(fontsize = fontsize)
    plt.xlabel('波数(cm^-1)')
    plt.ylabel('吸光度(AU)')
    plt.grid(True) # 网格线设置
    data.agg(lambda x: func_1(x), axis = 1)
    plt.show()
func_2(data.iloc[:,1:]) #附件二光谱曲线图

func_2(data_X) #附件二一阶平滑后数据曲线图

max_min = data_X.agg(lambda x:x.max()-x.min()) #计算极差
data1 = data_X.loc[:,max_min>max_min.mean()+0*max_min.std()] # 提取特征区间
data_X_know = data1.drop(qsz_index)
data_X_unknow = data1.loc[qsz_index,:]
data_Y_know = data_Y.drop(qsz_index)
test = pd.concat([data_Y_know, data_X_know], axis = 1)

欧式距离

from scipy.spatial import distance_matrix

dis = pd.DataFrame(distance_matrix(data_X_know.values, data_X_unknow.values), index = data_X_know.index, columns = data_X_unknow.index)

dis_group = pd.concat([data_Y_know, dis], axis = 1)

min_index = dis_group.iloc[:,1:].idxmin()
min_index #返回欧式距离最小的索引

[*zip(qsz_index, dis_group.loc[min_index,'OP'])] #返回待分类药材产地

支持向量机

from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from hyperopt import fmin, tpe, hp, rand
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn import datasets

print(data_X_know.shape, data_X_unknow.shape, data_Y_know.shape)

parameter_space_svc = {
    'C':hp.loguniform(np.log(100),np.log(1000)),
    'kernel':hp.choice('kernel',['rbf','poly']),
    'gamma':hp.loguniform('gamma',np.log(100),np.log(1000)),
}

Xtrain,Xtest,Ytrain,Ytest = train_test_split(data_X_know, data_Y_know, test_size = 0.3) 
#分离训练集和测试集


count = 0

def func(args):
    print(args)
    
    clf = svm.SVC(**args)
    
    clf.fit(Xtrain, Ytrain) #训练模型
    
    prediction = clf.predict(Xtest)  # 预测测试集
    
    global count
    count = count + 1
    score = accuracy_score(Ytest, prediction)
    print('[{0}], Test acc: {1})'.format(str(count), score))
    
    return -score


best = fmin(func,parameter_space_svc ,algo=tpe.suggest,max_evals=100)
kernel_list = ['rbf', 'poly']
best['kernel'] = kernel_list[best['kernel']]
print('best params:',best)


best_params_ = {'C':831.1011250415439, 'gamma':101.90409101969092}


clf = SVC(C = best_params_['C'], kernel = 'rbf', gamma = best_params_['gamma'], 
          decision_function_shape = 'ovr', cache_size=5000 # MB
         ).fit(Xtrain,Ytrain)
print(clf.score(Xtrain, Ytrain)) # 训练集

score_r = clf.score(Xtest, Ytest)
print(score_r) # 测试集

clf = clf.fit(data_X_know, data_Y_know)
print(clf.score(data_X_know, data_Y_know))
print(clf.predict(data_X_unknow))

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
根据引用和引用的内容,2021数学建模国赛C第二问的关键在于使用多次循环遗传算法和设置初始值,以及通过画图深入分析每周的库存和订购方案是否对材料有偏好等。建议使用spss时间序列专家建模器来进行建模,但由于订购商和转运商数量较多,处理时间可能较长。 另外,根据引用的内容,处理该问可以采用topsis评分方法,通过目所给的数据来确定权重,然后套用相应的公式进行计算。这个问的处理难度较低,无需调用matlab包来完成。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [2021数学建模国赛C二详细思路和代码](https://blog.csdn.net/qq_35712832/article/details/121931455)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] - *2* *3* [数学建模2021C总结](https://blog.csdn.net/yxycogo/article/details/126790272)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Python机器学习AI

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值