Pandas DataFrame入门教程(图解版) (biancheng.net)http://c.biancheng.net/pandas/dataframe.html
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
def cv1():
# 读取数据
dataframe = pd.read_csv('Iris.csv', header=None, names=["花萼长度", "花萼宽度", "花瓣长度", "花瓣宽度", "花类品质"])
# 归一化并输出
max_min_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
scaler_data = dataframe[['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']].apply(max_min_scaler)
print('归一化结果:',scaler_data)
#自变量
x = scaler_data[['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']]
#因变量
y = dataframe['花类品质']
#构建测试集
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
#初始化决策树模型
dt=DecisionTreeClassifier()
#训练模型
dt.fit(x_train,y_train)
#使用模型对测试集进行预测
result=dt.predict(x_test)
print(result)
#测试集准确率
property=accuracy_score(result,y_test)
print('准确率:',property)
import pandas as pd
from sklearn.naive_bayes import GaussianNB
if __name__ == '__main__':
data = pd.DataFrame(pd.read_excel('watermelon.xls'))
#设置映射字典
dic_target = {'是': 1, '否': 0, }
dic_color = {'青绿': 1, '乌黑': 2, '浅白': 0}
dic_found = {'蜷缩': 1, '稍蜷': 2, '硬挺': 0}
dic_knock = {'浊响': 2, '沉闷': 1, '清脆': 0}
dic_texture = {'清晰': 0, '稍糊': 1, '模糊': 2}
dic_umbilical = {'凹陷': 0, '稍凹': 1, '平坦': 2}
dic_touch = {'硬滑': 1, '软粘': 0}
#映射目标
data['好瓜'] = data['好瓜'].map(dic_target)
data['色泽'] = data['色泽'].map(dic_color)
data['根蒂'] = data['根蒂'].map(dic_found)
data['敲声'] = data['敲声'].map(dic_knock)
data['纹理'] = data['纹理'].map(dic_texture)
data['脐部'] = data['脐部'].map(dic_umbilical)
data['触感'] = data['触感'].map(dic_touch)
#划分训练集,自变量,因变量
X_train = data.iloc[:, 1:7].values
Y_train = data.iloc[:, 7].values
print(X_train)
print(Y_train)
#测试集
X_test = [[1,1, 2, 0, 0, 1]]
#正态分布作为先验分布,GaussianNB
GaussianClassifier = GaussianNB()
GaussianClassifier.fit(X_train, Y_train)
#预测
result = GaussianClassifier.predict(X_test)
#1为好瓜,0为坏瓜
if result == 0:
print("坏瓜")
else:
print("好瓜")