今天测试一个数据时,发现scanpy画的图和使用sklearn画的图有点不一样,解决过程如下
测试1
from sklearn import datasets
import scanpy as sc
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
np.random.seed(1)
random.seed(1)
iris = datasets.load_iris()
X = iris.data
label=iris.target
adata=sc.AnnData(X,dtype="float64")# 必须加入float64,否则结果不一致
adata.obs["celltype"]=label.astype(int).astype(str)
sc.tl.tsne(adata,random_state=0,use_fast_tsne=False)
#sc.tl.tsne(adata,random_state=0)
axis=sc.pl.tsne(adata,color=["celltype"],size=100,show=False)
sc_tsne=adata.obsm["X_tsne"]
#print(ax)
print(np.min(sc_tsne[:,0]))
print(np.max(sc_tsne[:,0]))
print(np.min(sc_tsne[:,1]))
print(np.max(sc_tsne[:,1]))
print("====================")
# import pickle
# #with open()
# file = open('/Users/xiaokang/Desktop/data/tsne.pkl', 'rb')
# tsne2=pickle.load(file)
target=label
tsne = TSNE(learning_rate=1000,init='random', random_state=0)
X_transformed = tsne.fit_transform(X)
fig=plt.figure()
for label in np.unique(target):
plt.scatter(X_transformed[label==target,0], X_transformed[label==target,1],label=label)
plt.legend(loc="upper left")
plt.show()
#print(X_transformed)
print(np.min(X_transformed[:,0]))
print(np.max(X_transformed[:,0]))
print(np.min(X_transformed[:,1]))
print(np.max(X_transformed[:,1]))
print("==================")
params_sklearn = dict(
perplexity=30,
random_state=0,
verbose=False,
early_exaggeration=12,
learning_rate=1000,
)
from sklearn.manifold import TSNE
# unfortunately, sklearn does not allow to set a minimum number
# of iterations for barnes-hut tSNE
tsne3 = TSNE(**params_sklearn)
X_transformed=tsne3.fit_transform(X)
fig=plt.figure()
for label in np.unique(target):
plt.scatter(X_transformed[label==target,0], X_transformed[label==target,1],label=label)
plt.legend(loc="upper left")
plt.show()
print(np.min(X_transformed[:,0]))
print(np.max(X_transformed[:,0]))
print(np.min(X_transformed[:,1]))
print(np.max(X_transformed[:,1]))
复现digits数据集结果
from sklearn import datasets
import scanpy as sc
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits
np.random.seed(1)
random.seed(1)
#降维
X,target= load_digits(return_X_y=True)
label=target.copy()
adata=sc.AnnData(X,dtype="float64")# 必须加入float64,否则结果不一致
adata.obs["Group"]=label.astype(int).astype(str)
sc.tl.tsne(adata,random_state=0,use_fast_tsne=False)
#sc.tl.tsne(adata,random_state=0)
axis=sc.pl.tsne(adata,color=["Group"],size=100,show=False,title="scanpy(TSNE)")
sc_tsne=adata.obsm["X_tsne"]
#print(ax)
print(np.min(sc_tsne[:,0]))
print(np.max(sc_tsne[:,0]))
print(np.min(sc_tsne[:,1]))
print(np.max(sc_tsne[:,1]))
print("====================")
np.random.seed(1)
random.seed(1)
#降维
X,target= load_digits(return_X_y=True)
X=X.astype("float32")# 这句不能删,X默认是float64的,但是scanpy内部做PCA之前把数据又变成了float32,所以
# 所以为了保持最终的结果一致,使用sklearn的PCA之前,我也将它变成了float32类型的
# adata=sc.AnnData(X)
# X=adata.X
# print(X.dtype)
#X=X.toarray()
label=target.copy()
target=label
pca=PCA(n_components=50,svd_solver="arpack",random_state=0)# 次数必须设置随机种子,否则结果和scanpy的结果略有差异
X=pca.fit_transform(X)
#print(X)
tsne = TSNE(random_state=0,learning_rate=1000)
X_transformed = tsne.fit_transform(X)
fig=plt.figure()
for label in np.unique(target):
plt.scatter(X_transformed[label==target,0], X_transformed[label==target,1],label=label)
plt.legend(loc="upper left")
plt.title("Sklearn(TSNE)")
plt.show()
#print(X_transformed)
print(np.min(X_transformed[:,0]))
print(np.max(X_transformed[:,0]))
print(np.min(X_transformed[:,1]))
print(np.max(X_transformed[:,1]))