记一串简单的分类映射代码,使用keras。
# coding=utf-8
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, train_test_split, ShuffleSplit, learning_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression, BayesianRidge, ARDRegression, Lasso, SGDRegressor, SGDClassifier
import sklearn.preprocessing as preprocessing
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
data_train = pd.read_csv("C:/Users/qiuxu/Desktop/qdata.csv", encoding='gbk')
# print(data_train.head())
df = data_train
df.drop(['hatC20', 'hatC40', ], axis=1, inplace=True)
# print(df.head())
train_np = df.values
y = train_np[:, 0]
y = y.astype('int')
X = train_np[:, 1:]
x_train, x_test, y_train, y_test = train_test_split(X, y)
clf = RandomForestClassifier(n_estimators='warn', max_depth=None, max_features=1)
# clf = LogisticRegression(solver='lbfgs', C=2.0, penalty='l2', tol=1e-6, multi_class='ovr')
# clf = BayesianRidge(n_iter=100, tol=1.0e-3, alpha_1=1e-6, alpha_2=1e-6, lambda_1=1.e-6, lambda_2=1.e-6,
# compute_score=False, fit_intercept=True, normalize=False, copy_X= True, verbose=False)
# clf = ARDRegression(n_iter=100, tol=1.0e-3, alpha_1=1e-6, alpha_2=1e-6, lambda_1=1.e-6, lambda_2=1.e-6,
# compute_score=False, threshold_lambda=1e4, fit_intercept=True, normalize=False, copy_X=True,
# verbose=False)
# clf = Lasso(alpha=0.5, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=1e-4,
# warm_start=False, positive=False, random_state=None, selection='cyclic')
# clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None,
# early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
# l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
# n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
# power_t=0.5, random_state=None, shuffle=True, tol=None,
# validation_fraction=0.1, verbose=0, warm_start=False)
# clf = KMeans(n_clusters=, init='k-means++', max_iter=100, n_init=1)
clf.fit(x_train, y_train)
predictions = clf.predict(x_test)
print(1 - sum(abs(predictions - y_test)) / len(predictions))