AI决策树

# 导入所需的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import matplotlib.pyplot as plt
#加载数据集
iris=load_iris()
x=iris.data
y=iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# 创建决策树分类器对象
clf = DecisionTreeClassifier()

# 使用训练数据训练模型
clf.fit(X_train, y_train)

# 使用测试数据评估模型
print("Model Accuracy: ", clf.score(X_test, y_test))

# 可视化决策树
fig, ax = plt.subplots(figsize=(12, 12))
tree.plot_tree(clf, filled=True)
plt.show()```
![结果](https://img-blog.csdnimg.cn/direct/66a0388312f3478da871684da5bac72d.png)


#绘制iris的2d图
%matplotlib inline
import matplotlib.pyplot as plt
X_sepal = x[:, :2]
plt.scatter(X_sepal[:, 0], X_sepal[:, 1], c=y, cmap=plt.cm.gnuplot)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

```bash
X_petal = x[:, 2:4]
plt.scatter(X_petal[:, 0], X_petal[:, 1], c=y, cmap=plt.cm.gnuplot)
plt.xlabel('Petal length')
plt.ylabel('Petal width')```

from sklearn.neighbors import KNeighborsClassifier
# looking for the one nearest neighbor 实例化分类器
knn = KNeighborsClassifier(n_neighbors=1)
print(knn)
#用数据来拟合模型(进行模型的训练)
knn.fit(x, y)
#对新的观测值进行预测
knn.predict([3,5,4,2])



X_new = [[3, 5, 4, 2], [5, 4, 3, 2]]
knn.predict(X_new)


#使用不同的K值
knn5 = KNeighborsClassifier(n_neighbors=5)
knn5.fit(x, y)
knn5.predict(X_new)
#依照同样的流程,使用不同的分类模型
# import the class
from sklearn.linear_model import LogisticRegression

# instantiate the model (using the default parameters)
logreg = LogisticRegression()

# fit the model with data
logreg.fit(x, y)

# predict the response for new observations
logreg.predict(X_new)

#异常检测
import numpy as np
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn import datasets
np.random.seed(5)
# 加载鸢尾花数据集
iris = datasets.load_iris()
X = iris.data
y = iris.target
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
plt.cla()
# 创建PCA对象,n_components设置为3,表示降维到3维
pca = decomposition.PCA(n_components=3)
pca.fit(X)
X = pca.transform(X)
for name, label in [("Setosa", 0), ("Versicolour", 1), ("Virginica", 2)]:
    ax.text3D(
        X[y == label, 0].mean(),
        X[y == label, 1].mean() + 1.5,
        X[y == label, 2].mean(),
        name,
        horizontalalignment="center",
        bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
    )
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(float)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k")
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
plt.show()

![结果](https://img-blog.csdnimg.cn/direct/533347d843494df0bcfa9a3a6166c080.png)



#其他  无监督时学习   异常检测实战summary
import pands as pd
import numpy as np 
data=pd.read_cvs("文件地址")
data.head()
%matplotlib inline
from matplotlib import pyplot as plt
fig1=plt.figure(figsize=(10,5))
plt.scatter(data.loc[:,"x1"],data.loc[:,"x2"])
plt.title("data")
plt.xlable("x1")
plt.ylable("x2")
plt.show()
x1=data.loc[:,"x1"]
x2=data.loc[:,"x2"]
fig2=plt.figure(figsize=(20,5))
plt.subplot(121)
plt.hist(x1.bins=100)
plt.xlable("x1")
plt.ylable("counts")
plt.title("x1 distribution")
plt.subplot(122)
plt.hist(x1.bins=100)
plt.xlable("x2")
plt.ylable("counts")
plt.title("x2 distribution")
plt.show()

x1_mean=x1.mean()
x1_sigma=x1.std()
x2_mean=x2.mean()
x2_sigma=x2.std()
from scipy.stats import norm
x1_range=np.linspace(0,20,300)
x1_normal=norm.pdf(x1_range,x1_mean,x1_sigma)
x2_range=np.linspace(0,20,300)
x2_normal=norm.pdf(x2_range,x2_mean,x2_sigma)
fig2=plt.figure(figsize=(10,5))
plt.subplot(121)
plt.plt(x1_range,x1_normal)
plt.title("normal p(x1)")
plt.subplot(122)
plt.plt(x2_range,x2_normal)
plt.title("normal p(x2)")
plt.show()

#建立模型和分值概率
from sklearn.covariance import EllipticEnvelope()
ad_modle=EllipticEnvelope()
ad_modle.fit(data)
y_predict=ad_modle.predict(data)
print(y_predict,pd.value_counts(y_predict))

fig4=plt.figure(figsize=(20,10))
orginal_data=plt.scatter(data.loc[:,"x1"],data.loc[:,"x2"],marker='x')
anpmaly_data=plt.scatter(data.loc[:,"x1"][y_predict==1],data.loc[:,"x2"][y_predict==-1],marker='o',facecolor='none',edgecolor='red',s=150)
plt.title("anpmaly delection result")
plt.xlable("x1")
plt.ylable("x2")
plt.legend((orginal_data,anpmaly_data),("orginal_data","anpmaly_data"))
plt.show()


ad_modle=EllipticEnvelope(contamination=0.02)
ad_modle.fit(data)
#其他代码copy过来看区别,分散开了

#task:基于数据建立knn模型实现分类(n_neighbors=3)
#数据载入
import pandas as pd
import numpy as np 
data=pd.read_cvs("文件路径")
data.head()
x=data.droup(["target",'lable'],axix=1)
y=data.loc[:,'lable']
x.head()
y.head()
#建立kn模型
from sklearn.neighbors import KNeighborsClassfier
KNN=KNeighborsClassfier(n_neighbors=3)
KNN.fit(x,y)
y_predict=knn.predict(x)
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y,y_predict)
print(accuracy)
#引入异常
from sklearn.preprocessing import StandardScaler
x_norm=StandardScaler().fit_transform(x)
print(x_norm)
#计算mean
x1_mean=x.loc[:,'sepal length'].mean()
x1_norm_mean=x_norm[:,0].mean()
x1_sigma=x.loc[:,'sepal length'].std()
x1_norm_sigma=x_norm[:,0].std()
%matplotlib inline
from matplotlib import pyplot as plt
fig=plt.figure(figsize=(10,10))
plt.hist(x.loc[:,"sepal length"],bins=100)
plt.show()
#处理好后
fig1=plt.figure(figsize=(10,10))
plt.subplot(121)
plt.hist(x.loc[:,"sepal length"],bins=100)
plt.subplot(122)
plt.hist(x_norm.loc[:,0],bins=100)
plt.show()

#维度
print(x.shape)
#pca
from sklearn.decomposition import PCA
pca=PCA(n_components=4)
x_pca=pca.fit_transfrom(x_norm)
var_ratio=pca.explained_variance_ratio_
print(var_ratio)
fig2=plt.figure(figsize=(20,5))
plt.bar(['1',"2","3","4"],var_ratio)
plt.xticks(['1',"2","3","4"],['pc1',"pc2","pc3","pc4"])
plt.ylable("pc vs var_ratio")
plt.show()


pca=PCA(n_components=2)
x_pca=pca.fit_transfrom(x_norm)
x_pca.shape

fig2=plt.figure(figsize=(10,10))
settos=plt.scatter(x_pca.loc[:,0][y==0],x_pca.loc[:,1][y==0])
ver=plt.scatter(x_pca.loc[:,0][y==1],x_pca.loc[:,1][y==1])
vir=plt.scatter(x_pca.loc[:,0][y==2],x_pca.loc[:,1][y==2])
plt.title("data")
plt.xlable("x1")
plt.ylable("x2")
plt.legend((settos,ver,vir),('settos','ver','vir'))
plt.show()

KNN=KNeighborsClassfier(n_neighbors=3)
KNN.fit(x_pca,y)
y_predict=knn.predict(x_pca)
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y,y_predict)
print(accuracy)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值