- 随机数据
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
sns.set()
# sns.set(style = 'whitegrid')
# sns.set_style('whitegrid')
% matplotlib inline
from sklearn.datasets.samples_generator import make_blobs
X,y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.6)
plt.scatter(X[:,0], X[:,1], c=y, s=50, cmap='autumn', edgecolor='black')
plt.plot([0.6], [2.1], 'x', color='r', markeredgewidth=2, markersize=10)
xfit = np.linspace(-1, 3.5)
for m,b in [(1, 0.65), (0.5, 1.6), (-0.2, 2.9)]:
plt.plot(xfit, m * xfit + b, 'k-')
plt.xlim(-1, 3.5)
哪个分割线好呢?
- Suppot Vector Machines: 通道最大化
xfit = np.linspace(-1, 3.5)
plt.scatter(X[:, 0], X[:, 1], c = y, s = 50, cmap = 'autumn', edgecolor='k')
for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
yfit = m * xfit + b
plt.plot(xfit, yfit, 'k-')
plt.fill_between(xfit, yfit-d, yfit+d, edgecolor='none', color='#AAAAAA', alpha=0.4)
plt.xlim(-1, 3.5)
SVM
from sklearn.svm import SVC # support vector classifier
model = SVC(kernel = 'linear')
model.fit(X, y)
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=‘ovr’, degree=3, gamma=‘auto_deprecated’, kernel=‘linear’, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
def plot_svc_decision_function(model, ax=None, plot_support=True):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0], xlim[1