# -*- coding: utf-8 -*-
"""
Created on Fri Oct 12 16:56:56 2018
@author: fengjuan
"""
import pandas as pd
import numpy as np
#导入matplotlib工具包的pyplot并简称为plt
#import matplotlib.pyplot as plt
#df_train.info()
#创建特征列表,网址里数据没有表头
column_names=['Sample code number','Clump Thickness','Uniformity of Cell Size',
'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',
'Bare Nulclei','Bland Chromatin','Nomal Nucleoli','Mitoses','Class']
#从网上读取
data=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',
names=column_names)
#将数据里的?替换为标准缺失值
data=data.replace(to_replace='?',value=np.nan)<