import pandas as pd import numpy as np #决策树分类器 from sklearn.tree import DecisionTreeClassifier #特征提取 字典提取: from sklearn.feature_extraction import DictVectorizer from sklearn.model_selection import train_test_split from sklearn.ensemble import AdaBoostClassifier def Demo_predict(): #设置目标特征 、特征值 data=pd.read_csv('2005年.csv') x=data[["GDP","物流消费总额","固定资产投资","进口总额","出口总额"]] y=data[["社会物流总额"]] #数据集划分 x_train,x_test,y_trian,y_test=train_test_split(x,y,random_state=42,test_size=0.3) x_test_old=x_test y_test_old=y_test print("x_test_old测试集:",x_test_old) print("y_test_old测试集:", y_test_old) #特征工程 transfer=DictVectorizer() x_train=x_train.to_dict(orient="records") print(x_train) x_test=x_test.to_dict(orient="records") print(x_test) x_train=transfer.fit_transform(x_train) x_test=transfer.fit_transform(x_test) print("x_train:",x_train) print("x_test:",x_test) #机器学习,模型训练 estimator=DecisionTreeClassifier() estimator.fit(x_train,y_trian.astype('int')) #模型评估;和预测; y_pre=estimator.predict(x_test) print("测试集真实值:", y_test_old) print("测试集预测值:",y_pre) res=estimator.score(x_test,y_test.astype('int')) print("准确率结果res:",res) if __name__ == '__main__': Demo_predict()
数据集:
2005年.csv
社会物流总额,GDP,物流消费总额,固定资产投资,进口总额,出口总额 53.23,12.13,2.52,9.82,1.8,2.0 53.44,13.13,2.67,9.86,1.95,2.13 53.55,14.33,2.84,9.88,2.36,2.61 54.73,15.76,3.02,10.1,3.31,3.51 57.79,17.35,3.25,10.67,4.5,4.75 58.72,19.31,3.5,10.84,5.29,6.1 59.6,21.77,3.84,11.0,6.34,7.76 61.92,24.86,4.23,11.43,7.97,9.75 67.44,27.24,4.61,12.45,9.36,11.46 65.82,29.75,4.58,12.15,7.86,9.62 68.19,32.9,4.73,12.58,10.32,12.64 72.69,36.03,4.99,13.42,12.42,15.2 73.49,38.8,5.12,13.56,13.41,16.41 73.71,41.79,5.25,13.6,14.45,17.69 74.08,44.84,5.35,13.67,15.33,18.76 72.74,47.93,5.43,13.43,14.88,18.21