1.利用学习曲线判断
2.误差 = 偏差(精确率) + 方差(稳定性)
3.下图中虚线为训练集,实线为测试集
'''
功能:判别过拟合和欠拟合
学习曲线Learning Curve:评估样本量和指标的关系
验证曲线validation Curve:评估参数和指标的关系
'''
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.learning_curve import learning_curve
import numpy as np
from sklearn.learning_curve import validation_curve
#导入数据
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',header=None)
X=df.loc[:,2:].values
y=df.loc[:,1].values
le=LabelEncoder()
y=le.fit_transform(y)#类标整数化
print (le.transform(['M','B'])