模式识别(七)聚类算法(模糊聚类Cmeans)识别细胞数据集

import os
import cv2 as cv
import numpy as np
import time
from sklearn.cluster import KMeans as Kmn
from sklearn.metrics.cluster import contingency_matrix
from sklearn.decomposition import PCA
from skfuzzy.cluster import cmeans as Cmn

def purity(cluster, labels, label_set, k = 3):
    p = np.zeros((k, len(label_set)))
    purity = 0
    for i in range(len(cluster)):
        p[int(cluster[i]), label_set.index(labels[i])] += 1
    purity = sum(p/len(labels))
    return purity
def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_mat = contingency_matrix(y_true, y_pred)
    # return purity
    return np.sum(np.amax(contingency_mat, axis=0)) / np.sum(contingency_mat)
def labels_to_original(labels, forclusterlist):
    assert len(labels) == len(forclusterlist)
    maxlabel = max(labels)
    numberlabel = [i for i in range(0, maxlabel + 1, 1)]
    numberlabel.append(-1)
    result = [[] for i in range(len(numberlabel))]
    for i in range(len(labels)):
        index = numberlabel.index(labels[i])
        result[index].append(forclusterlist[i])
    final = []
    for i in range(len(result)):
        temp = result[i]
        if temp:
            for j in range(len(temp)):
                final.append(temp[j])
    final = np.array(final)
    return final
def get_path(filepath):
    filelist = []
    filedirs = []
    for root, _, files in os.walk(filepath, topdown = False):
        for name in files:
            filelist.append(name)
            filedirs.append(os.path.join(root, name))
    return filelist, filedirs
def read_file(path,filelist, filedirs, n_component = 1, Use_smote = False):
    dic ={'DCIS':0, 'IDC':0, 'Muc':0, 'ILC':0, 'MC':0,
          'normal':1, 
          'TIL':2}
    datalist = []
    claslist = []
    for path in filedirs:
        img = cv.imread(path, 0) #单通道读入灰度图像
        vec = img.flatten() / 255
        name = filelist[filedirs.index(path)]
        clas = name.split('_')[0] #保留_前面的字符串即:'DCIS'...
        if len(vec) == 51*51:
            datalist.append(vec)
            claslist.append(dic[clas])
    claslist = np.array(claslist)
    datalist = np.array(datalist)
    pca = PCA(n_component)
    datalist_n = pca.fit_transform(datalist)
    if Use_smote == True:
        smo = SMOTE(random_state = 40)
        datalist_smo, claslist_smo = smo.fit_sample(datalist_n, claslist)
        return datalist_smo, claslist_smo
    else:
        return datalist_n, claslist
def clustering_Kmn(datalist, claslist):
    labels = [0, 1, 2]
    right = 0
    clustering = Kmn(n_clusters = 3,  random_state = 9).fit(datalist)
    newlabel = labels_to_original(clustering.labels_, claslist)
    for i in range(len(newlabel)):
        if newlabel[i] == claslist[i]:
            right += 1
    acc = right/len(newlabel) * 100
    pur = purity_score(claslist, newlabel)
    return acc, pur
def clustering_Cmn(datalist, claslist):
    label = []
    datalist = datalist.T
    center, u, u0, d, jm, p, fpc = Cmn(datalist, m=2, c=3, error=0.005, maxiter=1000)
    for i in u:
        label = np.argmax(u, axis=0)
    right = 0
    # print('现标签长度为:',len(label))
    # print('原标签长度为:',len(claslist))
    newlabel = labels_to_original(label, claslist)
    for i in range(len(newlabel)):
        if newlabel[i] == claslist[i]:
            right += 1
    acc = right / len(newlabel) * 100
    pur = purity_score(claslist, newlabel)
    return acc, pur

time_start = time.time()
path = 'D:\\STUDYFILE\\RUN\\cells\\train'
n_components = 0.3
if n_components < 1:
    pass
    # print('方差下限为:', n_components)
else:
    print('PCA降维至:', n_components)
filepath, filedirs = get_path(path)
# print(filepath)
# print(filedirs)
datalist, claslist = read_file(path,filepath, filedirs, n_components, Use_smote = False)

acc1, pur1 = clustering_Cmn(datalist, claslist)
acc2, pur2 = clustering_Kmn(datalist, claslist)

print('Cmeans聚类准确率为:', acc1, '% ,Cmeans聚类纯度为:', pur1, ';')
print('Kmeans聚类准确率为:', acc2, '% ,Kmeans聚类纯度为:', pur2, ';')
time_end = time.time()
print('耗时:', time_end-time_start, 's')
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值