import pandas as pd
def cut_down(filepath):
train_file = filepath
train_data = pd.read_csv(train_file)
train_data.info()
train_data.fillna(0)
print("=" * 10)
gl_float = train_data.select_dtypes(include=['float64'])
converted_float = gl_float.apply(pd.to_numeric,downcast='float')
gl_int = train_data.select_dtypes(include=['int64'])
converted_int = gl_int.apply(pd.to_numeric,downcast='integer')
temp=pd.DataFrame()
temp[converted_int.columns]=converted_int
temp[converted_float.columns]=converted_float
print(temp.info())
print("="*10)
return temp
import pandas as pd
import numpy as np
import downcast_demo
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
train_file='tap_fun_train.csv'
print("changing...")
train_data=downcast_demo.cut_down(train_file)
test_file='tap_fun_test.csv'
print("changing...")
test_data=downcast_demo.cut_down(test_file)
def feature(data,feature_columns,label=None):
#提取指定特征 和 标签
X = data[feature_columns].as_matrix()
if label != None:
y=data[label].as_matrix()
y=np.array(y).reshape(len(y))
return X,y
else:
return X
def trainandTest(X_train_data, y_train_data,test_x):
# XGBoost训练过程
# 线下测试model
X_train, X_test, y_train, y_test = train_test_split(X_train_data, y_train_data,
test_size=0.2,
random_state=21)
# from sklearn import neighbors
# model = neighbors.KNeighborsRegressor()
import lightgbm as lgb
model = lgb.LGBMRegressor(
boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
max_depth=-1, n_estimators=2000, objective='binary',
subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=-1
)
model.fit(train_x, train_y, eval_set=[(train_x, train_y)], eval_metric='rmse', early_stopping_rounds=100)
print("model fiting...")
pre=model.predict(X_test)
print("pre:")
print(pre)
print(y_test)
test_pre=model.predict(test_x)
return test_pre
# 显示重要特征
# plot_importance(model)
# plt.show()
if __name__=="__main__":
colums_list=['pvp_battle_count',
'pvp_lanch_count',
'pvp_win_count',
'pve_battle_count',
'pve_lanch_count',
'pve_win_count',
'pay_count',
'pay_price',
'avg_online_minutes']
train_x,train_y=feature(train_data,colums_list,'prediction_pay_price')
train_x=np.array(train_x)
print('train_x, train_y')
print(train_x.shape,'\n',train_y.shape)
test_x= feature(test_data, colums_list)
test_x = np.array(test_x)
print('test_x')
print(test_x.shape, '\n')
print("traing")
pre=trainandTest(train_x, train_y, test_x)
print("wring..")
sub=pd.DataFrame()
sub['user_id']=test_data['user_id']
sub['prediction_pay_price']=pre
sub.to_csv("sublgb.csv", index=False)