#包import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)print(mydata.head())##数据检索
plt.figure(figsize=(12,5))##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()
#包import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)print(mydata.head())##数据检索
plt.figure(figsize=(12,5))##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()'''1.无监督学习--不考虑Y标签'''#K均值聚类
_,k_label=kmeans2(mydata.iloc[:,:2],k=2,iter=100)#密度聚类
db=DBSCAN(eps=0.2,min_samples=5)
mydb=db.fit_predict(mydata.iloc[:,:2])
marker=["o","s"]
color=["r","b"]
kmarker=[marker[lab]for lab in k_label]
kcolor=[color[lab]for lab in k_label]
dbmarker=[marker[lab]for lab in mydb]
dbcolor=[color[lab]for lab in mydb]
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)for ii in np.arange(len(mydata.X1)):
plt.scatter(mydata.X1[ii],mydata.X2[ii],c=kcolor[ii],marker=kmarker[ii])
plt.title("K-means cluster")
plt.subplot(1,2,2)for ii in np.arange(len(mydata.X1)):
plt.scatter(mydata.X1[ii],mydata.X2[ii],c=dbcolor[ii],marker=dbmarker[ii])
plt.title("DBSCAN cluster")
plt.show()
#包import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)print(mydata.head())##数据检索
plt.figure(figsize=(12,5))##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()'''1.无监督学习--不考虑Y标签'''#K均值聚类
_,k_label=kmeans2(mydata.iloc[:,:2],k=2,iter=100)#密度聚类
db=DBSCAN(eps=0.2,min_samples=5)
mydb=db.fit_predict(mydata.iloc[:,:2])
marker=["o","s"]
color=["r","b"]
kmarker=[marker[lab]for lab in k_label]
kcolor=[color[lab]for lab in k_label]
dbmarker=[marker[lab]for lab in mydb]
dbcolor=[color[lab]for lab in mydb]
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)for ii in np.arange(len(mydata.X1)):
plt.scatter(mydata.X1[ii],mydata.X2[ii],c=kcolor[ii],marker=kmarker[ii])
plt.title("K-means cluster")
plt.subplot(1,2,2)for ii in np.arange(len(mydata.X1)):
plt.scatter(mydata.X1[ii],mydata.X2[ii],c=dbcolor[ii],marker=dbmarker[ii])
plt.title("DBSCAN cluster")
plt.show()'''2.有监督学习--考虑Y标签'''
mydata=np.array(mydata)
train_x,test_x,train_y,test_y=train_test_split(mydata[:,:2],mydata[:,2],test_size=0.3,random_state=123)print(train_x.shape)print(test_x.shape)print(train_y.shape)print(test_y.shape)'''
(140, 2)
(60, 2)
(140,)
(60,)
'''##定义logostic模型
logr=LogisticRegression()#训练
logr.fit(train_x,train_y)#预测
prey=logr.predict(test_x)#计算模型的分类效果
metrics.accuracy_score(test_y,prey)'''0.9'''
plt.figure(figsize=(8,6))
index0=np.where(train_y==0)[0]
index1=np.where(train_x==1)[0]
plt.plot(train_x[index0,0],train_x[index0,1],"o",label="train data class 0")
plt.plot(train_x[index1,0],train_x[index1,1],"s",label="train data class 1")
index0=np.where(prey==0)[0]
index1=np.where(prey==1)[0]
plt.plot(test_x[index0,0],test_x[index0,1],"*",label="prey data class 0")
plt.plot(test_x[index1,0],test_x[index1,1],"d",label="prey data class 1")#圈出识别识别错误的样本位置
plt.plot((0.05),(0.4),"ko",markersize=80,markerfacecolor="none")
plt.plot((1),(0.15),"ko",markersize=80,markerfacecolor="none")
plt.legend(loc=0)
plt.show()