Python在机器学习中的应用--第一章机器学习简介

Python在机器学习中的应用

#包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)
print(mydata.head())
##数据检索
plt.figure(figsize=(12,5))
##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()

在这里插入图片描述

#包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)
print(mydata.head())
##数据检索
plt.figure(figsize=(12,5))
##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()
'''1.无监督学习--不考虑Y标签'''
#K均值聚类
_,k_label=kmeans2(mydata.iloc[:,:2],k=2,iter=100)
#密度聚类
db=DBSCAN(eps=0.2,min_samples=5)
mydb=db.fit_predict(mydata.iloc[:,:2])
marker=["o","s"]
color=["r","b"]
kmarker=[marker[lab] for lab in k_label]
kcolor=[color[lab] for lab in k_label]
dbmarker=[marker[lab] for lab in mydb]
dbcolor=[color[lab] for lab in mydb]
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
for ii in np.arange(len(mydata.X1)):
    plt.scatter(mydata.X1[ii],mydata.X2[ii],c=kcolor[ii],marker=kmarker[ii])
plt.title("K-means cluster")
plt.subplot(1,2,2)
for ii in np.arange(len(mydata.X1)):
    plt.scatter(mydata.X1[ii],mydata.X2[ii],c=dbcolor[ii],marker=dbmarker[ii])
plt.title("DBSCAN cluster")
plt.show()

在这里插入图片描述

#包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os
#数据
path=os.path.join(os.getcwd(),"data","moonsdatas.csv")
mydata=pd.read_csv(path)
print(mydata.head())
##数据检索
plt.figure(figsize=(12,5))
##可视化无监督学习的数据
plt.subplot(1,2,1)
plt.plot(mydata.X1,mydata.X2,"ro")
plt.subplot(1,2,2)
index0=np.where(mydata.Y==0)[0]
index1=np.where(mydata.Y==1)[0]
plt.plot(mydata.X1[index0],mydata.X2[index0],"r*")
plt.plot(mydata.X1[index1],mydata.X2[index1],"bs")
plt.show()
'''1.无监督学习--不考虑Y标签'''
#K均值聚类
_,k_label=kmeans2(mydata.iloc[:,:2],k=2,iter=100)
#密度聚类
db=DBSCAN(eps=0.2,min_samples=5)
mydb=db.fit_predict(mydata.iloc[:,:2])
marker=["o","s"]
color=["r","b"]
kmarker=[marker[lab] for lab in k_label]
kcolor=[color[lab] for lab in k_label]
dbmarker=[marker[lab] for lab in mydb]
dbcolor=[color[lab] for lab in mydb]
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
for ii in np.arange(len(mydata.X1)):
    plt.scatter(mydata.X1[ii],mydata.X2[ii],c=kcolor[ii],marker=kmarker[ii])
plt.title("K-means cluster")
plt.subplot(1,2,2)
for ii in np.arange(len(mydata.X1)):
    plt.scatter(mydata.X1[ii],mydata.X2[ii],c=dbcolor[ii],marker=dbmarker[ii])
plt.title("DBSCAN cluster")
plt.show()
'''2.有监督学习--考虑Y标签'''
mydata=np.array(mydata)
train_x,test_x,train_y,test_y=train_test_split(mydata[:,:2],mydata[:,2],test_size=0.3,random_state=123)
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)
'''
(140, 2)
(60, 2)
(140,)
(60,)
'''
##定义logostic模型
logr=LogisticRegression()
#训练
logr.fit(train_x,train_y)
#预测
prey=logr.predict(test_x)
#计算模型的分类效果
metrics.accuracy_score(test_y,prey)
'''0.9'''
plt.figure(figsize=(8,6))
index0=np.where(train_y==0)[0]

index1=np.where(train_x==1)[0]
plt.plot(train_x[index0,0],train_x[index0,1],"o",label="train data class 0")
plt.plot(train_x[index1,0],train_x[index1,1],"s",label="train data class 1")
index0=np.where(prey==0)[0]
index1=np.where(prey==1)[0]
plt.plot(test_x[index0,0],test_x[index0,1],"*",label="prey data class 0")
plt.plot(test_x[index1,0],test_x[index1,1],"d",label="prey data class 1")
#圈出识别识别错误的样本位置
plt.plot((0.05),(0.4),"ko",markersize=80,markerfacecolor="none")
plt.plot((1),(0.15),"ko",markersize=80,markerfacecolor="none")
plt.legend(loc=0)
plt.show()

在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值