scanpy find resolution

from sklearn.metrics.cluster import adjusted_rand_score,normalized_mutual_info_score
import numpy as np 
################ Functions #############
# Find optimal resolution with given n_clusters
#########################
############### Attention ###############
# if plot=True,you should calculate umap in advance(sc.tl.umap)
#########################################
def find_resolution(adata_, n_clusters, random,plot=True): 
    print("you set n_cluster={}".format(n_clusters))
    adata = adata_.copy()
    obtained_clusters = -1
    iteration = 0
    resolutions = [0., 1000.]
    while obtained_clusters != n_clusters and iteration < 50:
        current_res = sum(resolutions)/2
        sc.tl.louvain(adata, resolution = current_res, random_state = random)
        labels = adata.obs['louvain']
        obtained_clusters = len(np.unique(labels))

        if obtained_clusters < n_clusters:
            resolutions[0] = current_res
        else:
            resolutions[1] = current_res
        iteration = iteration + 1
    final_cluster=len(np.unique(adata.obs['louvain']))
    if(final_cluster==n_clusters):
        print("Find Correctly")
    else:
        print("====Find uncorrectly ======")
    if(plot):
        sc.tl.louvain(adata,resolution=current_res)
        ARI= adjusted_rand_score(adata.obs["celltype"], adata.obs["louvain"])
        NMI= normalized_mutual_info_score(adata.obs["celltype"], adata.obs["louvain"])
        print("louvain clustering result(resolution={}):n_cluster={}".format(current_res,final_cluster))
        sc.pl.umap(adata,color=["louvain","celltype"],title=["resolution={},ARI={},NMI={}".format(np.round(current_res,3),ARI,NMI),"celltype"],save="_opti_resolution.png")
    return current_res

import scanpy as sc 
adata=sc.datasets.blobs()
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata,color=["blobs"])

在这里插入图片描述

adata.obs["celltype"]=adata.obs["blobs"].copy()
reso=find_resolution(adata,n_clusters=5,random=0)

在这里插入图片描述

load_digit数据集

from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt

def cluster_acc(y_true, y_pred):
    """
    This is code from original repository.
    Calculate clustering accuracy. Require scipy installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1

    ind = linear_sum_assignment(w.max() - w)

    accuracy = 0
    for idx in range(len(ind[0]) - 1):
        i = ind[0][idx]
        j = ind[1][idx]
        accuracy += w[i, j]
    accuracy = accuracy * 1.0 / y_pred.size
    return accuracy

import umap
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler
X,y_train=load_digits(return_X_y=True)
X = MinMaxScaler().fit_transform(X)
reducer=umap.UMAP(random_state=0)
X_transformed=reducer.fit_transform(X)
target=y_train.copy()
fig=plt.figure(figsize=(10,8))
for label in np.unique(target):
    plt.scatter(X_transformed[label==target,0], X_transformed[label==target,1],label=label)
plt.legend(loc="upper left")
plt.title("true_label")
plt.show()


kmeans = KMeans(n_clusters=10, init='k-means++', random_state=1,n_init=40)
# 这个和随机种子是有一定的差距的
pred_y = kmeans.fit_predict(X)

acc=cluster_acc(y_train,pred_y)
print("acc=",acc)
from sklearn import metrics
NMI=metrics.normalized_mutual_info_score(y_train, pred_y)
print("NMI=",NMI)

target=pred_y
fig=plt.figure(figsize=(10,8))
for label in np.unique(target):
    plt.scatter(X_transformed[label==target,0], X_transformed[label==target,1],label=label)
plt.legend(loc="upper left")
plt.title("Kmeans_pred")
plt.show()

在这里插入图片描述在这里插入图片描述# load_digit scanpy测试

from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler
X,y_train=load_digits(return_X_y=True)
X = MinMaxScaler().fit_transform(X)

adata=sc.AnnData(X=X)
adata.obs['celltype']=y_train.astype(str)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata,color=["celltype"])

在这里插入图片描述reso=find_resolution(adata,n_clusters=10,random=0)

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值