机器学习-模型投票融合

X=mvrnorm(n,mean,AA)
#X<-draw.d.variate.uniform(n,p,AA)
你把第一个生成X的方式换成第二个就是改成这样
#X=mvrnorm(n,mean,AA)
X<-draw.d.variate.uniform(n,p,AA)


就是这里的S1S2和S3,然后它是三个不一样的,式子,然后根据这三个不一样的式子分别产生六组随机数,也就是一个S1产生六组,S2产生六组,然后S3产生六组,然后每一组数据用各种方式跑一下,然后最后进行一个融合,然后进行一组投票。但是它每一组的,里面的每就是每个方法都最好有一个就是相对偏差,或者是相对什么,MSE或者什么的,那个都有一下,然后这样子每一种方法也能比较一下。

然后上面的那个P表示维度下面那个零,点儿零和零点儿四是表示的方差,就是在咱们设随机数进行,就是跑随机数的时候,在那个时候设置的,然后这两个都是产生数据的,就是一个产,就是产生数据用的。

# encoding = 'utf-8'

import random
import numpy as np
import math

def generage_data(n,p,rho):
    matrix = np.array([[0 for _ in range(p)] for _ in range(p)])
    for j in range(1,p):
        for k in range(1,p):
            if j<=k and k <= (j+5):
                matrix[j,k] = rho**(k-j)
            else:
                matrix[j,k] = 0
                
    for j in range(1,p):
        for k in range(1,p):
            if j < k:
                matrix[j][k] = matrix[k][j]
                

    mean = [0 for _ in range(p)]
    X = np.random.multivariate_normal(mean=mean, cov=matrix, size=600)

    Z = np.array([[0 for _ in range(p)] for _ in range(n)])
    Z[:,1] = np.sin(180*X[:,1])
    Z[:,2] = np.array([math.exp(num) for num in X[:,2]])

    epsilon = np.array([np.random.rand() for _ in range(n)])
    Y = np.array([1] * n) - 0.8 * Z[:,1] + 2.6 * Z[:,2] + 1.8 * Z[:,3] + 3*Z[:,1]**2 - 0.4*Z[:,2]**2 - Z[:,3]**2 - 0.5 * Z[:,1]*Z[:,2] + 1.4*Z[:,1]*Z[:,3] - 0.2*Z[:,2]*Z[:,3] + epsilon

    return X, Y

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor as RF
from sklearn.tree import DecisionTreeRegressor as RT
from xgboost import XGBRegressor as xgb
from lightgbm import LGBMRegressor as lgb
from sklearn.neural_network import MLPRegressor as MLP
from sklearn.linear_model import LinearRegression as LM


def get_model_result(n,p,rho):
    model_result = []
    X, Y = generage_data(n,p,rho)
    x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2)
    
    RF_model = RF(max_depth=10).fit(x_train,y_train)
    RT_model = RT(max_depth=10).fit(x_train,y_train)
    xgb_model = xgb(max_depth=10).fit(x_train,y_train)
    lgb_model = lgb(max_depth=10).fit(x_train,y_train)
    LM_model = LM().fit(x_train,y_train)
    MLP_model = MLP().fit(x_train,y_train)
    DL_1r = MLP(hidden_layer_sizes=(64,32,16,8),activation='relu') 
    DL_2r = MLP(hidden_layer_sizes=(32,32,32,32),activation='relu')
    DL_1s = MLP(hidden_layer_sizes=(64,32,16,8),activation='identity')
    DL_2s = MLP(hidden_layer_sizes=(32,32,32),activation='identity')
    DL_1r_model = DL_1r.fit(x_train,y_train)
    DL_2r_model = DL_2r.fit(x_train,y_train)
    DL_1s_model = DL_1s.fit(x_train,y_train)
    DL_2s_model = DL_2s.fit(x_train,y_train)
    
    model_name_list = ['RF','RT','xgb','lgb','LM','MLP','DL_1r','DL_1s','DL_2r','DL_2s']
    model_list = [RF_model,RT_model,xgb_model,lgb_model,LM_model,MLP_model,DL_1r_model,DL_1s_model,DL_2r_model,DL_2s_model]
    for i in range(len(model_list)):
        model = model_list[i]
        model_name = model_name_list[i]
        predcit = model.predict(x_test)
        temp_result = [p, rho, str(model_name), get_two_bias(predcit,y_test), get_two_sd(predcit,y_test), get_rmse(predcit,y_test)]
        model_result.append(temp_result)
        
    return model_result

import pandas as pd

all_df = pd.DataFrame()
for i in range(5):
    print(i)
    for dim in [10,400,1000]:
        for p in [0,0.4]:
            try:
                one_model_result = get_model_result(600, dim, p)
                temp_df = pd.DataFrame(one_model_result)
                temp_df.columns = ['dim', 'p', 'model_name', 'bias', 'sd', 'rmse']
                all_df = all_df.append(temp_df)
            except:
                pass

param_model_count = all_df.groupby(['dim','p','model_name']).count().reset_index().drop('sd',1).drop('rmse',1)
param_model_count = param_model_count.rename(columns={'bias':'count'})
param_model_sum_bias = all_df.groupby(['dim','p','model_name']).sum(['bias']).reset_index().drop('sd',1).drop('rmse',1)
param_model_sum_sd = all_df.groupby(['dim','p','model_name']).sum(['sd']).reset_index().drop('bias',1).drop('rmse',1)
param_model_sum_rmse = all_df.groupby(['dim','p','model_name']).sum(['rmse']).reset_index().drop('sd',1).drop('bias',1)

bias_count = pd.merge(param_model_count, param_model_sum_bias, on=['dim','p','model_name'])
sd_bias_count = pd.merge(bias_count, param_model_sum_sd, on=['dim','p','model_name'])
rmse_sd_bias_count = pd.merge(sd_bias_count, param_model_sum_rmse, on=['dim','p','model_name'])

rmse_sd_bias_count['bias'] = rmse_sd_bias_count['bias']/rmse_sd_bias_count['count']
rmse_sd_bias_count['sd'] = rmse_sd_bias_count['sd']/rmse_sd_bias_count['count']
rmse_sd_bias_count['rmse'] = rmse_sd_bias_count['rmse']/rmse_sd_bias_count['count']

rmse_sd_bias_count.to_csv("result.csv",encoding='utf-8',index=False)


import math


def get_average(records):
    """
    平均值
    """
    return sum(records) / len(records)


def get_variance(records):
    """
    方差 反映一个数据集的离散程度
    """
    average = get_average(records)
    return sum([(x - average) ** 2 for x in records]) / len(records)


def get_standard_deviation(records):
    """
    标准差 == 均方差 反映一个数据集的离散程度
    """
    variance = get_variance(records)
    return math.sqrt(variance)


def get_mse(records_real, records_predict):
    """
    均方误差 估计值与真值 偏差
    """
    if len(records_real) == len(records_predict):
        return sum([(x - y) ** 2 for x, y in zip(records_real, records_predict)]) / len(records_real)
    else:
        return None


def get_rmse(records_real, records_predict):
    """
    均方根误差:是均方误差的算术平方根
    """
    mse = get_mse(records_real, records_predict)
    if mse:
        return math.sqrt(mse)
    else:
        return None


def get_mae(records_real, records_predict):
    """
    平均绝对误差
    """
    if len(records_real) == len(records_predict):
        return sum([abs(x - y) for x, y in zip(records_real, records_predict)]) / len(records_real)
    else:
        return None
    
def get_two_bias(records_real, records_predict):
    """
    bias
    """
    if len(records_real) == len(records_predict):
        return get_variance([x-y for x, y in zip(records_real, records_predict)])
    else:
        return None

def get_two_sd(records_real, records_predict):
    """
    SD
    """
    if len(records_real) == len(records_predict):
        return get_standard_deviation([x-y for x, y in zip(records_real, records_predict)])
    else:
        return None

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值