机器学习
完整代码
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
df=pd.read_csv('G:/liu/python/music.csv') # 导入数据
X = df.drop(columns=['genre']) #切割数据,成两个变量,输入变量X和输出的因变量Y
Y = df['genre'] # 输出的因变量Y
X_tr, X_te, Y_tr, Y_te = train_test_split( X,Y,test_size = 0.2 ) #模型训练和测试数据分配,训练:测试=8:2
model = DecisionTreeClassifier() #创建模型
model.fit(X_tr,Y_tr) # 输入数据X_tr与结果数据Y_tr,训练模型
pre = model.predict(X_te) #输入测试数据X_te,得到预测数据
score = accuracy_score(Y_te, pre)#对比预测数据与正确结果Y_te的准确度打分
print(score) # 打印准确度
一、导入python库
import pandas as pd # 数据分析库
from sklearn.tree import DecisionTreeClassifier # sklearn 算法,决策算法
from sklearn.model_selection import train_test_split #模型训练和测试
from sklearn.metrics import accuracy_score #模型的准确度
二、导入数据
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
df=pd.read_csv('G:/liu/python/music.csv')
三、数据处理
X = df.drop(columns=['genre'])
Y = df['genre']
X_tr, X_te, Y_tr, Y_te = train_test_split( X,Y,test_size = 0.2 )
## 四、模型建立
```python
model = DecisionTreeClassifier()
model.fit(X_tr,Y_tr)
五、模型测试
pre = model.predict(X_te)
六、模型准确度
score = accuracy_score(Y_te, pre)
模型保存和加载
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals import joblib #
df=pd.read_csv('G:/liu/python/music.csv')
X = df.drop(columns=['genre'])
Y = df['genre']
model = DecisionTreeClassifier()
model.fit(X, Y)
joblib.dump(model, 'G:/liu/python/music.joblib') #把训练好的模型保存到G:/liu/python/music.joblib
模型的加载和使用
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals import joblib
model = joblib.load('G:/liu/python/music.joblib') #加载模型
pre = model.predict( [[21, 1],[22,0 ]] ) #使用模型预测结果
print(pre) #打印预测值
导出模型成可视化dot文件
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
df=pd.read_csv('G:/liu/python/music.csv')
X = df.drop(columns=['genre'])
Y = df['genre']
model = DecisionTreeClassifier()
model.fit(X, Y)
#输出文件可视化dot文件,设置可视化属性
tree.export_graphviz(model, out_file = 'G:/liu/python/music.dot', feature_names = ['age', 'gender'], class_names = sorted(Y.unique()), label='all', rounded = True ,filled=True)