mclust学习总结

我的心永远是冰冰哒

已于 2023-11-09 21:37:30 修改

阅读量555

点赞数

文章标签：学习 python 开发语言

于 2023-08-14 20:41:05 首次发布

本文链接：https://blog.csdn.net/qq_45759229/article/details/132285028

版权

mclust example1

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# n_features=2是默认的
plt.scatter(X[:,0], X[:,1])
plt.show()

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
label_mclust = mclust(X, num_cluster=4)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

pred_y = label_mclust.copy()
fig=plt.figure()
for label in np.unique(pred_y):
    plt.scatter(X[label==pred_y,0], X[label==pred_y,1],label=label)

plt.show()

结果如下
在这里插入图片描述

mclust (example 2)

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
X, y = make_blobs(n_samples=1000, n_features=50,centers=5, random_state=0)
print(X.shape)

label_mclust = mclust(X, num_cluster=5)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

在这里插入图片描述

mclust问题

stagate的默认代码是对30dim的数据进行聚类，所以并不出现问题

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import os
import sys
from sklearn.metrics.cluster import adjusted_rand_score
import STAGATE

#####################################################
data_root='/DATA2/zhangjingxiao/yxk/dataset/LIBD/'
section_id = '151676'
save_root="./LIBD_result/"
#####################################################

input_dir = os.path.join(data_root, section_id)
adata = sc.read_visium(path=input_dir, count_file='filtered_feature_bc_matrix.h5')
adata.var_names_make_unique()
print(adata)
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.pca(adata)

# read the annotation
Ann_df = pd.read_csv(data_root+"/"+section_id+ "/" + section_id +"_layer_guess.csv",index_col=0)
del Ann_df["barcode"]
del Ann_df["sample_name"]
Ann_df.columns = ['Ground Truth']
#Ann_df
adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth']

在这里插入图片描述

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
features  = adata.obsm["X_pca"][:,0:30]
label_mclust = mclust(features, num_cluster=7)
print(label_mclust)

在这里插入图片描述但是奇怪的问题出现出现了

features  = adata.obsm["X_pca"]
label_mclust = mclust(features, num_cluster=7)
print(label_mclust)

结果如下
在这里插入图片描述
所以目前只能默认对于50维的数据做不了，慢慢来

我的心永远是冰冰哒

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
mclust学习总结

结果如下。
复制链接

扫一扫