import tensorflow as tf
import matplotlib. pyplot as plt
% matplotlib inline
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn. datasets import load_iris
from sklearn. model_selection import train_test_split
from sklearn. metrics import accuracy_score
from sklearn. datasets import load_boston
df_x_train = pd. read_csv( '减少整理.csv' )
df_x_test = pd. read_csv( '减少整理test.csv' )
df_y_train = pd. read_csv( 'label.csv' )
df_x_train. drop( 'Id' , axis= 1 , inplace= True )
df_x_test. drop( 'Id' , axis= 1 , inplace= True )
df_y_train. drop( 'Id' , axis= 1 , inplace= True )
x_train = np. array( df_x_train)
y_train = np. array( df_y_train)
x_test = np. array( df_x_test)
x_train
array([[37.376 , 6.252 , 5.748 , ..., 92.69918886,
15.72122343, 65.29502881],
[37.362 , 6.762 , 5.238 , ..., 92.15746543,
15.47566709, 64.85981882],
[37.376 , 6.254 , 5.746 , ..., 92.1416865 ,
15.72226807, 65.31301408],
...,
[41.189 , 7.18 , 8.82 , ..., 89.55632469,
5. , 73.57027283],
[41.199 , 7.531 , 8.469 , ..., 89.53275236,
5. , 73.43826769],
[41.291 , 7.418 , 8.582 , ..., 89.55424014,
5. , 73.32689229]])
def my_loss ( st, sp) :
num_example = sp. shape[ 0 ]
num_size = sp. shape[ 1 ]
w = np. ones( sp. shape)
b = np. zeros( sp. shape)
e = np. exp( abs ( st - sp) / 0.012 ) - 1
for j in range ( num_size) :
if j == 0 :
LL = 299.85
UL = 300.15
else :
LL = 199.925
UL = 200.075
for i in range ( num_example) :
if st[ i] [ j] >= LL and st[ i] [ j] <= UL:
if sp[ i] [ j] < LL or sp[ i] [ j] > UL:
w[ i] [ j] = 10
if st[ i] [ j] < LL:
b[ i] [ j] = abs ( st[ i] [ j] - LL)
if sp[ i] [ j] >= LL:
w[ i] [ j] = 10
else :
b[ i] [ j] = abs ( st[ i] [ j] - UL)
if sp[ i] [ j] <= UL:
w[ i] [ j] = 10
a= 100 * b+ 1
score = np. sum ( w* e* a) / float ( num_example)
return score
params = {
'booster' : 'gbtree' ,
'objective' : 'reg:squarederror' ,
'max_depth' : 10 ,
'lambda' : 2 ,
'subsample' : 0.6 ,
'colsample_bytree' : 0.6 ,
'min_child_weight' : 3 ,
'eta' : 0.05 ,
'seed' : 1000 ,
}
plst = list ( params. items( ) )
dtrain = xgb. DMatrix( x_train, y_train[ : , 0 ] )
dtest = xgb. DMatrix( x_test)
num_rounds = 340
model = xgb. train( plst, dtrain, num_rounds)
y_pred1 = model. predict( dtest)
y_pred1
array([300.06595, 300.06448, 300.05875, ..., 300.0341 , 300.03125,
300.03802], dtype=float32)
dtrain = xgb. DMatrix( x_train, y_train[ : , 1 ] )
dtest = xgb. DMatrix( x_test)
num_rounds = 340
model = xgb. train( plst, dtrain, num_rounds)
y_pred2 = model. predict( dtest)
y_pred2
array([199.97592, 199.96873, 199.96329, ..., 200.10896, 200.10452,
200.07726], dtype=float32)
dtrain = xgb. DMatrix( x_train, y_train[ : , 2 ] )
dtest = xgb. DMatrix( x_test)
num_rounds = 340
model = xgb. train( plst, dtrain, num_rounds)
y_pred3 = model. predict( dtest)
y_pred3
array([199.9959 , 199.99684, 199.99486, ..., 200.00102, 199.9969 ,
200.00063], dtype=float32)
y_pred= np. zeros( ( 3953 , 3 ) )
y_pred[ : , 0 ] = y_pred1
y_pred[ : , 1 ] = y_pred2
y_pred[ : , 2 ] = y_pred3
y_pred
array([[300.06594849, 199.97592163, 199.99589539],
[300.06448364, 199.96873474, 199.99684143],
[300.05874634, 199.96328735, 199.99485779],
...,
[300.03408813, 200.10896301, 200.00102234],
[300.03125 , 200.10452271, 199.99690247],
[300.0380249 , 200.07725525, 200.00062561]])
df = pd. DataFrame( y_pred)
df. to_csv( 'new/提交6.csv' )