作业代码（交作业用）

Da-qiong
已于 2022-06-06 18:33:54 修改
阅读量168
点赞数 1
文章标签： python sklearn 机器学习
于 2022-06-04 19:30:32 首次发布
本文链接：https://blog.csdn.net/qq_52008148/article/details/125124647
版权
一份作业代码

随机森林回归

楼主彩笔

from sklearn import datasets
import sklearn.ensemble
import numpy as np
from typing import Union
import math
from sklearn.model_selection import train_test_split


datasets = datasets.load_diabetes()
X = datasets["data"] # 10*422
y = datasets["target"] # 422

# print(X)
# print(y)

#X_train,X_test, y_train, y_test = train_test_split(X,y,test_size = 0.8, random_state = 42)


class MetaLearner(object):

    def __init__(self,
                 min_samples: int = 5,
                 min_gain: float = 0,
                 max_depth: int = 20,
                 max_leaves: Union[int, None] = None,
                 ):
        self.max_depth = max_depth
        self.min_samples = min_samples

    def cal_val(self, x_y):
        y_list = list(x_y[:,-1])
        if len(y_list) == 0:
            return 0
        # print(x_y)
        # print(y_list)
        ave = 0
        for label in y_list:
            ave += label
        ave /= len(y_list)
        sum = 0
        for label in y_list:
            sum += (label - ave)**2
        return(sum / len(y_list))

    def find_best_thre(self,x_y,attr):
        beat_val = math.inf
        val = self.cal_val(x_y)
        attr_list = x_y[:,attr]
        val_sum = 0


        thre_list =[attr_list[i] for i in range(len(attr_list))]

        for thre in thre_list:
            less_thre = x_y[:, attr] < thre
            # print(less_thre)
            larg_thre = ~less_thre
            val_sum = (sum(less_thre) / len(thre_list)) * self.cal_val(x_y[less_thre]) + (sum(larg_thre) / len(thre_list)) * self.cal_val(x_y[larg_thre])
            if val_sum < beat_val:
                beat_val = val_sum
                best_thre = thre
        return best_thre,val - val_sum

    def find_ave(self, x_y):
        attr_list = x_y[:,-1]
        attr_list = list(attr_list)
        sum = 0
        for attr in attr_list:
            sum += attr

        return sum / len(attr_list)

    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        x_y = np.hstack((X, y.reshape(len(y), 1)))
        attr_list = list(range(len(X[0])))
        # x = np.concatenate((x_y, x_y))
        # np.random.shuffle(x)
        # self.node = self.tree(x, attr_list)
        self.node = self.tree(x_y, attr_list, 0)
        #print(self.node)
        return(self.node)

    def tree(self, x_y, attr_list, depth):
        print(depth)
        if depth >= self.max_depth:
            node = self.find_ave(x_y)
            print("shut down: depth")
            return node
        if len(set(x_y[:,-1])) == 1:
            node = list(set(x_y[:,-1]))[0]
            #print(self.deepcount)
            return node
        elif (len(x_y) <= self.min_samples):
            node = self.find_ave(x_y)
            print("shut down: too little")
            #print(self.deepcount)
            return node
        else:
            best_attr_gain = -math.inf
            best_thre = None
            best_attr = None
            for attr in attr_list:
                thre, attr_gain = self.find_best_thre(x_y,attr)
                if attr_gain > best_attr_gain:
                    best_attr_gain = attr_gain
                    best_thre = thre
                    best_attr = attr


            left = x_y[:,best_attr] < best_thre
            right = ~left
            left = x_y[left]
            right = x_y[right]
            if len(left) == 0:
                left = self.find_ave(x_y)
            else:
                left = self.tree(left, attr_list, depth + 1)

            if len(right) == 0:
                right = self.find_ave(x_y)
            else:
                right = self.tree(right, attr_list, depth + 1)

            if left == right:
                node = left
            else:
                node = {}
                node[(best_attr, best_thre)] = {"<": left, ">=": right}
            #print(self.deepcount)
            return node

    def predictone(self, node, x):
        pre_val = None
        if type(node) == np.float64:
            pre_val = node
        elif type(node) == dict:
            key = list(node)[0]
            if x[key[0]] <= key[1]:
                son = node[key]["<"]
            else:
                son = node[key][">="]
            pre_val = self.predictone(son, x)
        return pre_val

    def predict(self, X: np.ndarray) -> np.ndarray:
        anslist = []
        for i in X:
            pre_val = self.predictone(self.node, i)
            anslist.append(pre_val)
        return np.array(anslist)



class trees(MetaLearner):
    def __init__(self, X_train_list, y_train_list, X_test_list, y_test_list):
        self.X_train_list = X_train_list
        self.y_train_list = y_train_list
        self.X_test_list = X_test_list
        self.y_test_list = y_test_list

    def vote(self,min_samples, max_depth):
        num = len(self.X_train_list)
        trs = []
        for i in range(num):
            tree = MetaLearner(min_samples, max_depth)
            tree.fit(self.X_train_list[i], self.y_train_list[i])
            trs.append(tree)

        # predict
        ans = []
        pre = []
        flag = np.random.randint(0, len(X_train_list))
        X_test = self.X_test_list[flag]
        y_test = self.y_test_list[flag]
        for i in range(num):
            ans.append(trs[i].predict(X_test))
        for y in range(len(ans[0])):
            sum = 0
            for x in range(len(ans)):
                sum += ans[x][y]
                pre.append(sum / len(ans))

        count = 0
        for i in range(len(y_test)):
            count += (pre[i] - y_test[i])**2
        ac = count / len(y_test)

        return pre, ac




seed_list = list(range(50))
np.random.shuffle(seed_list)

X_train_list = []
X_test_list = []
y_train_list = []
y_test_list = []
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=seed_list[i])
    X_train_list.append(X_train)
    X_test_list.append(X_test)
    y_train_list.append(y_train)
    y_test_list.append(y_test)

m = trees(X_train_list, y_train_list, X_test_list, y_test_list)
pre,ac = m.vote(min_samples=5, max_depth=20)

print(pre)
print(ac)

#skl自带
# skl_rf = sklearn.ensemble.RandomForestRegressor(n_estimators=10)
# skl_rf.fit(X_train, y_train)
# predict_skl = skl_rf.predict(X_test)
# count = 0
# for i in range(len(y_test)):
#     count += (predict_skl[i] - y_test[i])**2
# skl_ac = count / len(y_test)
# print("skl_ac: ",skl_ac)