机器学习初步
import pandas as pd from sklearn.tree import DecisionTreeClassifier music_data =pd.read_csv('music.csv') X = music_data.drop(columns=['genre']) y = music_data['genre'] model=DecisionTreeClassifier() model.fit(X,y) predictions=model.predict([[21,1],[22,0]]) predictions # array(['HipHop', 'Dance'], dtype=object)
Calculating the Accuracy
import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score music_data =pd.read_csv('music.csv') X = music_data.drop(columns=['genre']) y = music_data['genre'] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2) model=DecisionTreeClassifier()#create a model model.fit(X_train,y_train)#train it predictions=model.predict(X_test) score=accuracy_score(y_test,predictions) score #每次运行都可能会出现0-1之间不同的结果。
Persisting Models
import pandas as pd from sklearn.tree import DecisionTreeClassifier import joblib #music_data =pd.read_csv('music.csv') #X = music_data.drop(columns=['genre']) #y = music_data['genre'] #model=DecisionTreeClassifier() #model.fit(X,y) model=joblib.load('music_recommdender.joblib') predictions=model.predict([[21,1]])#数组里面的一个元素 predictions # array(['HipHop'], dtype=object)
Visualizing a Decision Tree
import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn import tree music_data =pd.read_csv('music.csv') X = music_data.drop(columns=['genre']) y = music_data['genre'] model=DecisionTreeClassifier() model.fit(X,y) tree.export_graphviz(model,out_file='music-recommender.dot', feature_names=['age','gender'],#以性别和年龄作为特征 class_names=sorted(y.unique()),#将类名设置为唯一的类型列表,为了显示每个音乐的种类 label='all',#每个结点有可以读的标签 rounded=True,#有圆角 filled=True#每个盒子,每个节点都有颜色)
将‘music-recommender.dot'拖拽到vscode中,下载相应的插件然后preview