第四届工业大数据代码-提交2（自写）

最新推荐文章于 2024-07-07 21:34:25 发布

jianghuchuanwen

最新推荐文章于 2024-07-07 21:34:25 发布

阅读量243

点赞数

文章标签：机器学习

本文链接：https://blog.csdn.net/wojiaoawenlong/article/details/108851335

版权

import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np

import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_boston

    df_x_train = pd.read_csv('减少整理.csv')
    df_x_test = pd.read_csv('减少整理test.csv')
    df_y_train = pd.read_csv('label.csv')
    df_x_train.drop('Id', axis=1, inplace=True)
    df_x_test.drop('Id', axis=1, inplace=True)
    df_y_train.drop('Id', axis=1, inplace=True)
#     df_y_test.drop('Id', axis=1, inplace=True)
    
    x_train = np.array(df_x_train)
    y_train = np.array(df_y_train)
    x_test = np.array(df_x_test)
#     y_test = np.array(df_y_test)

x_train

array([[37.376     ,  6.252     ,  5.748     , ..., 92.69918886,
        15.72122343, 65.29502881],
       [37.362     ,  6.762     ,  5.238     , ..., 92.15746543,
        15.47566709, 64.85981882],
       [37.376     ,  6.254     ,  5.746     , ..., 92.1416865 ,
        15.72226807, 65.31301408],
       ...,
       [41.189     ,  7.18      ,  8.82      , ..., 89.55632469,
         5.        , 73.57027283],
       [41.199     ,  7.531     ,  8.469     , ..., 89.53275236,
         5.        , 73.43826769],
       [41.291     ,  7.418     ,  8.582     , ..., 89.55424014,
         5.        , 73.32689229]])

def my_loss(st, sp):
    num_example = sp.shape[0]
    num_size = sp.shape[1]
    w = np.ones(sp.shape)
    b = np.zeros(sp.shape)
    e = np.exp(abs(st - sp)/0.012) - 1
    for j in range(num_size):
        if j == 0:
            LL = 299.85
            UL = 300.15
        else:
            LL = 199.925
            UL = 200.075
        for i in range(num_example):
            if st[i][j] >= LL and st[i][j] <= UL:
                if sp[i][j] < LL or sp[i][j] > UL:
                    w[i][j] = 10
            if st[i][j] <LL:
                b[i][j] = abs(st[i][j] - LL)
                if sp[i][j] >= LL:
                    w[i][j] = 10
            else:
                b[i][j] = abs(st[i][j] - UL)
                if sp[i][j] <= UL:
                    w[i][j] = 10
    a=100*b+1
    score = np.sum(w*e*a) / float(num_example)
    return score

params = {
        'booster': 'gbtree',
        'objective': 'reg:squarederror', # 回归的损失函数，gmma回归
        'max_depth': 10,
        'lambda': 2,
        'subsample': 0.6,
        'colsample_bytree': 0.6,
        'min_child_weight':3,
        'eta': 0.05,
        'seed': 1000,
        
    }
plst = list(params.items())

dtrain = xgb.DMatrix(x_train, y_train[:,0])
dtest = xgb.DMatrix(x_test)

num_rounds = 340
model = xgb.train(plst, dtrain, num_rounds)

y_pred1 = model.predict(dtest)

y_pred1

array([300.06595, 300.06448, 300.05875, ..., 300.0341 , 300.03125,
       300.03802], dtype=float32)

dtrain = xgb.DMatrix(x_train, y_train[:,1])
dtest = xgb.DMatrix(x_test)
num_rounds = 340
model = xgb.train(plst, dtrain, num_rounds)
y_pred2 = model.predict(dtest)

y_pred2

array([199.97592, 199.96873, 199.96329, ..., 200.10896, 200.10452,
       200.07726], dtype=float32)

dtrain = xgb.DMatrix(x_train, y_train[:,2])
dtest = xgb.DMatrix(x_test)
num_rounds = 340
model = xgb.train(plst, dtrain, num_rounds)
y_pred3 = model.predict(dtest)

y_pred3

array([199.9959 , 199.99684, 199.99486, ..., 200.00102, 199.9969 ,
       200.00063], dtype=float32)

y_pred=np.zeros((3953,3))

y_pred[:,0]=y_pred1

y_pred[:,1]=y_pred2

y_pred[:,2]=y_pred3

y_pred

array([[300.06594849, 199.97592163, 199.99589539],
       [300.06448364, 199.96873474, 199.99684143],
       [300.05874634, 199.96328735, 199.99485779],
       ...,
       [300.03408813, 200.10896301, 200.00102234],
       [300.03125   , 200.10452271, 199.99690247],
       [300.0380249 , 200.07725525, 200.00062561]])

# y_test

# accuracy = my_loss(y_test, y_pred)
# print('accuracy:'+str(accuracy))

df = pd.DataFrame(y_pred)
df.to_csv('new/提交6.csv')

# plt.plot(num_rounds,accuracy,label="accuracy")
# plt.legend()

jianghuchuanwen

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
第四届工业大数据代码-提交2（自写）

import tensorflow as tfimport matplotlib.pyplot as plt%matplotlib inlineimport pandas as pdimport numpy as npimport xgboost as xgbfrom xgboost import plot_importance,plot_treefrom sklearn.datasets import load_irisfrom sklearn.model_selection import
复制链接

扫一扫