import numpy as np
import urllib.request
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# url with dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
# download the file
raw_data = urllib.request.urlopen(url)
dataset = np.loadtxt(raw_data, delimiter=",")
h=.02
X = dataset[:,:2]
y = dataset[:,8]
model = LogisticRegression()
model.fit(X, y)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 #分别寻找第0列和第1列的最小值和最大值
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) #以各自的最小值和最大值;0.2为步长构建网格
# logistic regression
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) #ravel()函数使矩阵扁平化 c_将两个list合成2维矩阵
Z=Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
# Plot also the training points
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8) #使用等高线的函数将不同的区域绘制出来
#plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
plt.scatter(X[:, 0], X[:, 1],c='ry',marker='o', edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.show()
程序运行结果: