daimadiamadaima

最新推荐文章于 2024-07-20 09:51:47 发布

翘指

最新推荐文章于 2024-07-20 09:51:47 发布

阅读量84

点赞数

文章标签： python 开发语言

本文链接：https://blog.csdn.net/m0_65334415/article/details/139637546

版权

import pandas as pd
import matplotlib. pyplot as plt
from sklearn. datasets import load_iris
from sklearn.linear_model import LinearRegression
% matplotlibinline
iris = load_iris()#导人数据集iris
data = pd. DataFrame(iris. data)
data.columns =[ 'sepal -length', 'sepal -width'， 'petal - length', 'petal - width"]#使用sklearn完成一元线性回归
x =data[ 'petal -length']. values
y =data[ 'petal - width'].values
X=x.reshape( len(x),1)
y- y.reshape( len(y),1)
clf =LinearRegression()
clf.fit(x, y)
pre = clf. predict(x)
plt.scatter(x, y,s = 50)
plt. plot(x, pre, 'r-', linewidth=2)
plt.xlabel('petal- length')
plt.ylabel('petal - width')
for idx, min enumerate(x):
plt.plot([m,m],[y[idx], pre[ idx]],'g-')
plt.show()
print(u"系数:",clf.coef_)
print(u"截距:",clf.intercept_)
#对花尊长度为3.9的花,预测其花尊宽度
print('预测值为:,clf. predict([[3.9]]))

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn. linear_model import LogisticRegression
x=load_iris().data
y=load_iris().target
#划分训练集和测试集，并对数据进行工标准化
x_train, x_test, y_train, y test = train test split(x, y, test size = 0.random_state = 0)
from sklearn. preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
#训练逻辑回归模型并对测试集进行预测
classifier = LogisticRegression(random_state = 0)
classifier. fit(x_train, y_train)
y_pred = classifier. predict(x_test)
#用 LogisticRegression自带的 score获得模型在测试集上的准确性
print ( 'Accuracy of LR Classifier:% .3f'% classifier.score(x_test, y_test))

import pandas as pd
import matplotlib. pyplot as plt
from sklearn. datasets import load_iris
from sklearn import tree
from sklearn.model_selection import train_test_splitiris =load_iris()
X_train, X_test, y_train, y_test = train_test_split( L1S. aata, 1f1s. target, test_size= 0.20, random_state = 20)
clf =tree. DecisionTreeClassifier()#criterion默认为'gini'
clf =clf.fit(X_train, y_train)
plt.figure(dpi = 200)
tree.plot_tree(clf, feature_names = iris.feature_names,class_names = iris. targe_tnames)
#feature_names = iris.feature_names设置决策树中显示的特征名称
#预测数据[6,5,5,2]的类别
print('数据[6,5,5,2]的类别:,，clf. predict([[6,5,5,2]]))
print('测试集的标签:\n', y_test)
print('模型准确率为:',"{0:.3f}". format(clf.score(X_test, y_test)))

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
iris = load_iris()
x=iris.data[:,:2]
Y=iris. target
print(iris.feature_names)
omap_light = ListedColormap([ '#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold= ListedColormap(['#EFO000','#00FFO0','#0000FF'])
clf =KNeighborsClassifier(n_neighbors = 10,weights = 'uniform')clf.fit(X, Y)
#画出决策边界
x_min, x_max = X[:,0].min(） -1,X[:,0].max(）+1
y_min, y_max = X[:，1].min(） -1,[:,1].max()+1
xx, yy =np. meshgrid(np. arange(x_min, x_max, 0. 02),
np.arange(y_min, v_max,0.02))
Z = clf. predict(np.c_[xx.ravel(), vy. ravel()]).reshape(xx.shape)
plt.figure()
plt. pcolormesh(xx, vYY, Z, cmap = cmap_light)
#绘制预测结果图
plt. scatter(X[:，0],X[:,1],c= Y, cmap = cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(),vy.max())
plt.title('3_Class(k = 10, weights = uniform)')
plt.show()

import numpy as npfrom sklearn import svmfrom sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
import matplotlib. pyplot as plt
iris = datasets.load iris()
x,y= iris.data,iris.target

x_train,x test,y train,ytest= model_selection.train test split(x,y,random state=1,test size =0.2)
classifier = svm.SVC(kernel = 'linear', gamma = 0,1,decision function shape = 'ovo'C=0.1)
classifier.fit(x train,y train.ravel())
print("SVM-输出训练集的准确率为:"，classifier.score(x_train,y_train))

print("SVM-输出测试集的准确率为:"，classifier.score(x_test，y_test))
y hat = classifier.predict(x test)
classreport = metrics,classification report(y test,y hat)

from sklearn.datasets import load iris
from sklearn.naive bayes import GaussianNB
iris = load iris()
clf = GaussianNB()
#设置分类器
clf fit(iris.data,iris.target)
y_pred = clf.predict(iris.data)
print("Number of mislabeled points out of $d points: % d" %(iris.data. shape[0],
(iris.target!= y_pred) .sum()))

from sklearn. datasets import load iris
from sklearn.cluster import KMeans
iris = load iris()# 加载数据集
X= iris.data
estimator = KMeans(n clusters =3)
#构造 K-Means 聚类模型
estimator.fit(X)#对数据进行聚类
label pred = estimator.labels
# 获取聚类标签
print(label pred)#显示各个样本所属的类别标签

from sklearn,datasets.samples_generator import make blobs
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import matplotlib. pyplot as plt
from itertools import cycle
#产生随机数据的中心
centers =[[1,1][-1,-1],[1,-1]]#产生的数据个数
n_samples = 3000#生产数据
X,lables true = make blobs(n samples =n samples, centers = centers, cluster_std =0.6,random state=0)#设置分层聚类函数
linkages =['ward', 'average', 'complete' ]
nclusters =3
ac =AgglomerativeClustering(linkage = linkages2],n_clusters = n_clusters_)#训练数据
ac.fit(X)
#每个数据的分类labels = ac.labels
#绘图
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgremykbgrcmykbgrcmyk' )
for k, col in zip(range(n clusters_),colors):#根据labels中的值是否等于k,重新组成一个True、False 的数组
my_members = labels ==k
#X[my_members，0]取出my_members对应位置为True的值的横坐标
plt.plot(X[my members,0],X[my_members,1],col +',')
plt,title('Estimated number of clusters: gd'% n clusters )
plt.show()

from sklearn import datasetsI
import numpy as np
import random
import matplotlib. pyplot as plt
def findNeighbor( j,X, eps) :
N =[]
for p in range(X. shape[ 0 ]) :#找到所有邻域内对象
temp = np. sqrt(np. sum(np. square(x[j]- X[p])))#欧氏距离
if(temp<= eps):
N. append(p)
return N
def dbscan(X,eps,min Pts):
k=-1
NeighborPts =[]
Ner NeighborPts =[ ]fil =[]
gama =[x for x in range(len(X))]
cluster =[-1 for y in range(len(X))]
while len(gama)> 0 :
j= random.choice(gama)
gama.remove(j)
fil. append( j)
NeighborPts = findNeighbor(j,X,eps)
if len(NeighborPts)< min Pts :
cluster[j]=-1
else:
k = k+1
cluster[j]= k
for i in NeighborPts :
if i not in fil:
gama, remove(i)
fil. append( i)
Ner NeighborPts = findNeiqhbor( i,X,eps)
if len(Ner NeighborPts)>= min Pts:
for a in Ner NeighborPts :
if a not in NeighborPts:
NeighborPts. append(a)
if(cluster[i]==-1):
cluster[i]= k
return cluster
X1,y1 = datasets,make_circles(n_samples=1000,factor= .6,noise= .05)
X2,y2 = datasets.make_blobs(n_samples= 300,n_features = 2, centers s[[1.2,1.2]],cluster std =[[.1]],random_state =9)
X=np.concatenate((X1,X2))
eps = 0.08
minPts=10
C= dbscan(X,eps,min Pts)
plt.figure(figsize=(12，9)，dpi=80)
plt.scatter(X[:,0],X[:,1],c = C)
plt.show()

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load iris
data = load iris()
y = data. target
x= data.data
pca=PCA(ncomponents=2)
reduced_x=pca.fit transform(x)
reduced_x

red x,red y =[ ],[ ]
blue x,blue y =[ ],[ ]
green_x,green y =[ ],[ ]
for i in range(len(reduced x)):
if y[i] == 0:
red _x. append(reduced x[ i ][0 ])
red y. append(reduced x[ i][ 1 ])
elif y[i]== 1:
blue _x. append(reduced x[ i ][0 ] )
blue_y. append(reduced_ x[ i][1 ])
else:
green_ x. append(reduced x[ i ][ 0 ])
green y.append(reduced x[i][ 1])
plt.scatter(red x,red y,c = 'r',marker = 'x' )
plt.scatter(blue x,blue y,c = 'b' , marker = 'D' )
plt. scatter(green x,green y,c = 'g',marker = '. ’)
plt.show()

翘指

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
daimadiamadaima

import pandas as pdimport matplotlib. pyplot as pltfrom sklearn. datasets import load_irisfrom sklearn.linear_model import LinearRegression% matplotlibinlineiris = load_iris()#导人数据集irisdata = pd. DataFrame(iris. data)data.columns =[ 'sepal -length'
复制链接

扫一扫