这个Paper结合BOE之间的距离的思想,对Murphy的方法进行了改进
代码注解:虽然代码写的还是一如既往的臭,仅仅知识为了获得运行结果而写,但还是基本复现了Paper。
import pandas as pd
import numpy as np
def focal_Element(boe):
key_m=[];
m_key=boe.keys();
for item in m_key:
key_m.append(item)
keym_set=set(key_m);
return keym_set;
def setUnion(boeI,boeJ):
setBoe1=focal_Element(boeI);
setBoe2=focal_Element(boeJ);
return setBoe1.union(setBoe2);
def columnVector(boeI,boeJ):
unionSets=setUnion(boeI,boeJ);
list_name=["key","boeI","boeII"];
list_matrix=[];
list_matrix.append(list_name);
for item in unionSets:
list_value = list();
list_value.append(item)
if item in boeI.keys():
value_I=boeI[item];
else:
value_I=0;
list_value.append(value_I);
if item in boeJ.keys():
value_J=boeJ[item];
else:
value_J=0;
list_value.append(value_J);
list_matrix.append(list_value);
return np.array(list_matrix);
def vectorDifference(boeI,boeJ):
vector_1_2=columnVector(boeI,boeJ);
vector_1=vector_1_2[:,(0,1)];
vector_2=vector_1_2[:,(0,2)];
line,column=vector_1.shape;
for i in range(1,line):
for j in range(1,line):
if(vector_1[i,0]==vector_2[j,0]):
vector_1[i,1]=float(vector_1[i,1])-float(vector_2[j,1]);
vector_1[0,1]="boe"
return vector_1;
def initMatrixD(boeI,boeJ):
vertorDif=vectorDifference(boeI,boeJ);
lines,columns=vertorDif.shape;
set_Key=set();
for i in range(1,lines):
set_Key.add(vertorDif[i,0]);
line_one=["0"];
for item in set_Key:
line_one.append(item);
listD=[];
listD.append(line_one);
for i in range(1,lines):
tmp_list=[];
tmp_list.append(line_one[i])
for j in range(1,lines):
tmp_list.append(0)
listD.append(tmp_list)
dMatrix=np.array(listD);
return dMatrix;
def matrixD(boeI,boeJ):
initMatrix=initMatrixD(boeI,boeJ);
lines,columns=initMatrix.shape;
initMatrix=initMatrix[initMatrix[:,0].argsort()]
initMatrix=initMatrix[:,initMatrix[0].argsort()]
if(lines!=columns):
print("初始化矩阵出现错误");
return;
list_m=[]
for i in range(1,lines):
str_0=initMatrix[i, 0][:]
set_column=set(str_0)
list_t = []
for j in range(1,lines):
str_1=initMatrix[0,j][:];
set_line=set(str_1);
interChild=len(set_line.intersection(set_column));
unionMother=len(set_line.union(set_column));
sum=interChild/unionMother;
list_t.append(sum)
set_line.clear();
list_m.append(list_t)
set_column.clear();
initMatrix=np.array(list_m)
return initMatrix;
def getDistance(boeI,boeJ):
vector_column=vectorDifference(boeI,boeJ);
x, y = vector_column.shape;
vector_column=vector_column[1:x,:]
vector_column=vector_column[vector_column[:,0].argsort()]
x,y=vector_column.shape;
vector_=vector_column[:,y-1];
x=vector_.shape[0];
vector_=vector_.reshape(x,1)
x,y=vector_.shape;
list_tmp=[];
for i in range(0,x):
sum=float(vector_[i, y - 1])
list_tmp.append(sum);
vector_0=np.array(list_tmp).reshape(x,1);
vector_T=vector_0.reshape(1,x);
dMatrix=matrixD(boeI,boeJ);
result_0=np.dot(vector_T,dMatrix);
result_1=result_0.dot(vector_0);
result_finall=(1/2)*result_1[0][0]
distance=pow(result_finall,1/2);
return distance;
def getSimilar(boeI,boeJ):
return 1-getDistance(boeI,boeJ);
def SMM(n,*args):
SMM_=np.zeros((n,n),dtype=float);
for i in range(0,n):
for j in range(0,n):
SMM_[i,j]=getSimilar(args[i],args[j]);
return SMM_;
def Crdi(n,*args):
smm=SMM(n,*args);
if n<1:
print("Crdi 错误")
exit(0);
str_0="m"
dictSup=dict();
totalSup=0.00
for i in range(0,n):
sup= 0.00
for j in range(0,n):
if j!=i:
sup+=smm[i,j]
str_1 = str_0 + str(i + 1);
totalSup+=sup;
if str_1 in dictSup.keys():
dictSup[str_1]=sup;
else:
dictSup[str_1]=sup;
for item in dictSup.keys():
crdi=dictSup[item]/totalSup;
dictSup[item]=crdi;
return dictSup
def MAE(n,*args):
dictCrdi=Crdi(n,*args);
keysList=[];
for item in args:
keysList.append(focal_Element(item));
keySet=set();
for item in keysList:
keySet=keySet.union(item);
keysList=list(keySet);
keysList.sort()
for item_key in keysList:
for item_fun in args:
if item_key not in item_fun.keys():
item_fun[item_key]=0;
str_0="m"
dict_Result = dict();
for item_key in keysList:
i = 0;
values=0.00
for item_fun in args:
i = i + 1;
str_1=str_0+str(i);
if item_key in item_fun.keys():
if str_1 in dictCrdi.keys():
values+=item_fun[item_key]*dictCrdi[str_1]
if item_key in dict_Result.keys():
dict_Result[item_key]=values;
else:
dict_Result[item_key] = values;
return dict_Result;
def computeEmpty(m_1,m_2):
F1=focal_Element(m_1);
F2=focal_Element(m_2);
theta_set1 = set();
theta_set2 = set();
empty = set();
m_empty = 0.00
for item1 in F1:
theta_set1.update(item1);
for item2 in F2:
theta_set2.update(item2);
if (theta_set1.intersection(theta_set2) == empty):
m_empty += m_1[item1] * m_2[item2];
theta_set2.clear();
theta_set1.clear();
return m_empty;
def computeInter(m_1,m_2):
F1=focal_Element(m_1);
F2=focal_Element(m_2);
thetaSet=F2;
f1_set=set();
f2_set=set();
strSet=set();
inter=dict();
for item0 in thetaSet:
strSet.update(item0);
sum=0.00
for item1 in F1:
f1_set.update(item1);
for item2 in F2:
f2_set.update(item2);
if(f1_set.intersection(f2_set)==strSet):
sum+=m_1[item1]*m_2[item2];
f2_set.clear();
f1_set.clear();
if item0 in inter:
inter[item0]=sum;
else:
inter[item0] = sum;
strSet.clear();
return inter;
def weightAvg(n,avgSet):
for i in range(1,n):
if(i==1):
diedai=computeInter(avgSet,avgSet);
k=computeEmpty(avgSet,avgSet);
k=1/(1-k);
for item in diedai.keys():
if item in diedai:
value_=round(diedai[item]*k,4);
diedai[item]=value_;
diedai_0 = diedai;
print("第 {0} 次迭代结果:{1}".format(i,diedai));
else:
diedai_1=computeInter(diedai_0,avgSet);
k=computeEmpty(diedai_0,avgSet);
k=1/(1-k);
for item in diedai_1:
if item in diedai_1:
value_=round(diedai_1[item]*k,4);
diedai_1[item]=value_;
print("第 {0} 次迭代结果:{1}".format(i, diedai_1));
diedai_0=diedai_1;
if __name__ == '__main__':
boe1={"A":0.5,"B":0.2,"C":0.3}
boe2={"A":0,"B":0.9,"C":0.1}
boe3={"A":0.55,"B":0.1,"AC":0.35}
boe4={"A":0.55,"B":0.1,"AC":0.35}
boe5={"A":0.6,"B":0.1,"AC":0.3}
n=5
avgSet=MAE(n,boe1,boe2,boe3,boe4,boe5);
weightAvg(n,avgSet)