mclust学习总结

mclust example1

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# n_features=2是默认的
plt.scatter(X[:,0], X[:,1])
plt.show()

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
label_mclust = mclust(X, num_cluster=4)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

pred_y = label_mclust.copy()
fig=plt.figure()
for label in np.unique(pred_y):
    plt.scatter(X[label==pred_y,0], X[label==pred_y,1],label=label)

plt.show()

结果如下
在这里插入图片描述
在这里插入图片描述

mclust (example 2)

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
X, y = make_blobs(n_samples=1000, n_features=50,centers=5, random_state=0)
print(X.shape)

label_mclust = mclust(X, num_cluster=5)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

在这里插入图片描述

mclust问题

stagate的默认代码是对30dim的数据进行聚类,所以并不出现问题

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import os
import sys
from sklearn.metrics.cluster import adjusted_rand_score
import STAGATE

#####################################################
data_root='/DATA2/zhangjingxiao/yxk/dataset/LIBD/'
section_id = '151676'
save_root="./LIBD_result/"
#####################################################

input_dir = os.path.join(data_root, section_id)
adata = sc.read_visium(path=input_dir, count_file='filtered_feature_bc_matrix.h5')
adata.var_names_make_unique()
print(adata)
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.pca(adata)

# read the annotation
Ann_df = pd.read_csv(data_root+"/"+section_id+ "/" + section_id +"_layer_guess.csv",index_col=0)
del Ann_df["barcode"]
del Ann_df["sample_name"]
Ann_df.columns = ['Ground Truth']
#Ann_df
adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth']

在这里插入图片描述

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
features  = adata.obsm["X_pca"][:,0:30]
label_mclust = mclust(features, num_cluster=7)
print(label_mclust)

在这里插入图片描述但是奇怪的问题出现出现了

features  = adata.obsm["X_pca"]
label_mclust = mclust(features, num_cluster=7)
print(label_mclust)

结果如下
在这里插入图片描述
所以目前只能默认对于50维的数据做不了,慢慢来

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值