将聚类结果保存在json文件中

最新推荐文章于 2024-02-23 11:52:02 发布

DeniuHe

最新推荐文章于 2024-02-23 11:52:02 发布

阅读量418

点赞数

分类专栏：算法文章标签：聚类 python 机器学习

本文链接：https://blog.csdn.net/DeniuHe/article/details/124531881

版权

算法专栏收录该内容

193 篇文章

订阅专栏

import json
import xlwt
import xlrd
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from time import time
from collections import OrderedDict


names_list = ["HDI2"]

data_path = Path("D:\OCdata")
part_path = Path("E:\DDDDD_Result\DataPartitions")

for name in names_list:
    path = data_path.joinpath(name + ".csv")
    print("########################{}".format(path))
    data = np.array(pd.read_csv(path, header=None))
    scaler = StandardScaler()
    X = scaler.fit_transform(np.asarray(data[:, :-1], np.float64))
    y = data[:, -1]
    y -= y.min()
    nClass = len(np.unique(y))
    workbook = xlwt.Workbook()
    # -----------------------------
    read_path = str(part_path.joinpath(name + ".xls"))
    book = xlrd.open_workbook(read_path)
    Label = OrderedDict()
    Center = OrderedDict()
    for SN in book.sheet_names():
        S_Time = time()
        table = book.sheet_by_name(SN)
        pool_idx = []
        for idx in table.col_values(0):
            if isinstance(idx,float):
                pool_idx.append(int(idx))

        pool_idx = np.array(pool_idx)

        SheetNames = "{}".format(SN)
        sheet = workbook.add_sheet(SheetNames)
        column = 0
        for n_clusters in range(nClass+1, 11*nClass+2):

            kmeans = KMeans(n_clusters=n_clusters)
            kmeans.fit(X=X[pool_idx])
            # --------------store the clustering labels-----------------
            # TODO
            Label[n_clusters] = list(kmeans.labels_)

            for j, jdx in enumerate(kmeans.labels_):
                sheet.write(j,column,int(jdx))
            column += 1
            # -----------------------------------------------------------
            center = np.zeros(n_clusters)
            for lab in range(n_clusters):
                tmp_center = kmeans.cluster_centers_[lab]
                lab_ids = np.where(kmeans.labels_==lab)[0]
                min_dist = np.inf
                for idx in lab_ids:
                    dist = np.linalg.norm(X[idx] - tmp_center)
                    if dist <= min_dist:
                        min_dist = dist
                        center[lab] = idx
            Center[n_clusters] = list(center)
            # ---------------Store the clustering centers idx----------------
            for j, jdx in enumerate(center):
                sheet.write(j,column,int(jdx))
            column += 1
            # ---------------------------------------------------------------
        print("SN:",SN, "Time:",time()-S_Time)


    # Cluster_Result = {
    #     "Label":Label,
    #     "Center":Center
    # }


    b = json.dumps(Label)


    save_path = Path(r"E:\DDDDD_Result\KmeansResult")
    save_path = str(save_path.joinpath(name + ".xls"))
    f = open("{}".format(name),"w")
    f.write(b)
    f.close()