fisher聚类算法
其原理建议看以下这篇文献,但在解法上这里采用的是比较粗暴的方法,直接搜索所有的可能,之后根据损失函数找出对应的分类情况,弊端比较大,对于样本数大的情况基本爆炸!建议采用fisher的解法。
由于数据集比较不同,以下代码不能直接使用。
import win32com.client as com
import pandas as pd
import os
from matplotlib import pyplot as plt
import numpy as np
import matplotlib as mpl
import math
from datetime import datetime
import matplotlib.dates as mdates
mpl.rcParams['font.sans-serif'] = ['SimHei'] # 使图像正常显示中文标签
mpl.rcParams['axes.unicode_minus'] = False # 使图像正常显示负号
temp = []
# 设置横纵坐标的名称以及对应字体格式
font1 = {
'weight': 'normal',
'size': 15,
}
font2 = {
'weight': 'normal',
'size': 12,
}
################################数据读取################################################
def Num_fileread(dir,file):
os.chdir(dir)
tem = pd.read_table(file, encoding='gbk')
# print(tem)
# print(tem[12:].reset_index(drop=True))#截取7行以下的内容
tem = tem[12:].reset_index(drop=True)
tem.columns = ['DATA']
tem1 = pd.DataFrame([var.split(';') for var in tem.DATA])
tem1.columns = ['Measur', 'from', 'to','greenbus', 'total ', 'social','']#分号后面也成一列,所以id是空值
# # tem1.drop(['id'],axis=1,inplace=True)
# print(tem1)
df=tem1
df.to_excel('data.xls',sheet_name='data')
#occup.rate
df1=df.iloc[(df['Measur']=='1').values,[2,3,4,5]]
return df,df1
# df2=df.iloc[(df['Measur']=='2').values,[0,1,2,3,4,5,6]]
# df3=df.iloc[(df['Measur']=='3').values,[0,1,2,3,4,5,6]]
# df4=df.iloc[(df['Measur']=='4').values,[0,1,2,3,4,5,6]]
# return df,df1,df2,df3,df4
def picture1():
fig=plt.figure(1)
ax1=fig.add_subplot(111)
d=df.iloc[(df['Measur']=='1').values,[2]].astype(float)
# print(type(d))
ax1.plot(d,df.iloc[(df['Measur']=='1').values,[4]].astype(float), 'o-', label='road1_total')
ax1.plot(df.iloc[(df['Measur'] == '1').values, [2]].astype(float),df.iloc[(df['Measur'] == '1').values, [3]].astype(float), 'o-', label='road1_bus')
# ax1.plot(df