import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
from matplotlib.font_manager import FontProperties
font=FontProperties(fname="E:\Fonts\方正粗黑宋简体.ttf",size=14)from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib as mpl
import seaborn as sns
from sklearn import tree,metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
import pydotplus
import os
import graphviz
# from sklearn.externals.six import StringIO# from sklearn.externals.six import StringIOimport io
from IPython.display import Image
'''读取训练集合测试集'''defRead_data(path):
data=pd.read_csv(path,engine='python')return data
defmain1():
path1 = os.path.join(os.getcwd(),"data","Titanic_train.csv")
path2 = os.path.join(os.getcwd(),"data","Titanic_test.csv")
paths =[path1, path2]
datas =[]for path in paths:
datas.append(Read_data(path))
train = datas[0]
test = datas[1]
data_pro=datas
print("训练集的列缺失值得情况:\n",train.isnull().sum())print("测试集的列缺失值得情况:\n", test.isnull
Python在机器学习–决策树和集成学习import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport scipy as spfrom matplotlib.font_manager import FontPropertiesfont=FontProperties(fname="E:\Fonts\方正粗黑宋简体.ttf",size=14)from mpl_toolkits.mplot3d import Axe