autokeras--自动机器学习模型训练

模型

  • 分类模型反应的是在不同类别上的概率
  • 回归模型反应的推测可能值
  • 准备数据集
  • 数据集预处理
  • 模型训练
  • 模型导出
  • 模型加载部署
  • 模型预测

封装数据集与模型训练


from tensorflow.keras.datasets import mnist
import abc
import os
import numpy as np
import tensorflow as tf
from sklearn.datasets import load_files
from tensorflow.keras.models import load_model
import autokeras as ak
import requests
import cv2


class ABCDatasets(metaclass=abc.ABCMeta):
    """数据集抽象类,以下方法必须全部重新复写"""

    @property
    @abc.abstractmethod
    def load_data(self):
        """加载数据"""
        pass

    @property
    @abc.abstractmethod
    def train_data(self):
        """训练数据"""
        pass

    @property
    @abc.abstractmethod
    def test_data(self):
        """测试数据"""
        pass

    @property
    @abc.abstractmethod
    def label_mapping(self):
        """标签映射关系"""
        return {}


class ABCModel(metaclass=abc.ABCMeta):
    """模型抽象类,以下方法必须全部重新复写"""

    @abc.abstractmethod
    def train(self):
        """训练方法"""
        pass

    @abc.abstractmethod
    def export_model(self, filename):
        """导出模型"""
        pass

    @abc.abstractmethod
    def load_model(self, filename):
        """加载模型"""
        pass

    @abc.abstractmethod
    def predict(self, image: list):
        """模型预测"""
        pass


class Model(ABCModel):
    modeler: ak.AutoModel

    def __init__(self, datasets: ABCDatasets = None):
        self.datasets = datasets
        self.x_train, self.y_train = self.datasets.train_data
        self.x_test, self.y_test = self.datasets.test_data
        self.label_mapping = self.datasets.label_mapping

    def train(self):
        self.modeler.fit(self.x_train, self.y_train, epochs=1)

    def export_model(self, filename):
        self.modeler.export_model().save(filename)

    def load_model(self, filename):
        self.modeler = load_model(filename, custom_objects=ak.CUSTOM_OBJECTS)

    def predict(self, images: np.array):
        return self.modeler.predict(np.array(images))

    def post(self, predict_result):
        return


class MnistDataSets(ABCDatasets):
    """
    配置数据集,以及标签
    """

    def __init__(self):
        self.x_train = self.y_train = self.x_test = self.y_test = None

    def load_data(self):
        """加载官方的手写数据集"""
        (self.x_train, self.y_train), (self.x_test, self.y_test) = mnist.load_data()
        # print(self.x_train.shape)
        # print(self.y_train.shape)
        # print(self.x_train[0].shape)
        # (60000, 28, 28)
        # (60000,)
        # (28, 28)
        # 这里输入可知,数据集包含了60000张图片,且素材是一个单通道28x28

    @property
    def label_mapping(self):
        """标签映射关系"""
        return {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 0: 0}

    @property
    def train_data(self):
        """训练数据集"""
        return self.x_train, self.y_train

    @property
    def test_data(self):
        """测试数据集"""
        return self.x_test, self.y_test

    def get_online_test_data(self):
        """
        在线获取一张手写体图片,并做前处理
        :return:
        """
        label = 3
        url = "https://img1.baidu.com/it/u=3472197447,93830654&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=281"
        image = requests.get(url).content
        nparr = np.fromstring(image, np.uint8)
        gray = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
        gray = cv2.resize(gray, (28, 28))
        _, gray = cv2.threshold(gray, thresh=165, maxval=255, type=cv2.THRESH_BINARY)
        return gray, label


class IMDBDataSets(ABCDatasets):
    """
    配置数据集,以及标签
    """

    def __init__(self):
        self.x_train = self.y_train = self.x_test = self.y_test = None

    def load_data(self):
        """加载数据"""
        dataset = tf.keras.utils.get_file(
            fname="aclImdb.tar.gz",
            origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
            extract=True,
        )
        IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")

        self.classes = ["pos", "neg"]
        train_data = load_files(
            os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=self.classes
        )
        test_data = load_files(
            os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=self.classes
        )
        self.x_train = np.array(train_data.data)
        self.y_train = np.array(train_data.target)
        self.x_test = np.array(test_data.data)
        self.y_test = np.array(test_data.target)
        print(self.x_train[0])
        print(self.y_train[0])
        print(self.x_train.shape)
        print(self.y_train.shape)
        print(self.x_train[0].shape)
        # 第一个print输出是一个文本
        # 1
        # (25000,)
        # (25000,)
        # ()

    @property
    def label_mapping(self):
        """标签映射关系"""
        return {0: self.classes[0], 1: self.classes[1]}

    @property
    def train_data(self):
        """训练数据集"""
        return self.x_train, self.y_train

    @property
    def test_data(self):
        """测试数据集"""
        return self.x_test, self.y_test

模型训练与测试

from tools import Model, MnistDataSets  # 这里的包由上面的封装导入
import autokeras as ak


class ImageClassifier(Model):
    """图像分类"""
    modeler = ak.ImageClassifier(overwrite=True, max_trials=1)

    def post(self, predict_result):
        """后处理"""
        label_predict = []
        prob_predict = predict_result
        for img_predict in prob_predict:
            idx = img_predict.argmax()
            label_predict.append(self.label_mapping.get(idx))
        return label_predict


class ImageRegressor(Model):
    """
    图像回归
    """
    modeler = ak.ImageRegressor(overwrite=True, max_trials=1)

    def post(self, predict_result):
        label_predict = []
        for img_predict in predict_result:
            label_predict.append(self.label_mapping.get(int(img_predict)))
        return label_predict


def train_image_classifier():
    """训练数据"""
    data = MnistDataSets()
    data.load_data()

    model = ImageClassifier(datasets=data)
    model.train()
    model.export_model("mnist_image_classifier.h5")


def test_image_classifier():
    """使用在线数据进行测试"""
    data = MnistDataSets()
    image, label = data.get_online_test_data()
    model = ImageClassifier(datasets=data)
    model.load_model("mnist_image_classifier.h5")
    predict_result = model.predict(images=[image])
    post_result = model.post(predict_result)[0]
    print("predict_result", predict_result)
    print(label, post_result, label == post_result)


def train_image_regressor():
    """训练数据"""
    data = MnistDataSets()
    data.load_data()

    model = ImageRegressor(datasets=data)
    model.train()
    model.export_model("mnist_image_regressor.h5")


def test_image_regressor():
    """使用在线数据进行测试"""
    data = MnistDataSets()
    image, label = data.get_online_test_data()
    model = ImageRegressor(datasets=data)
    model.load_model("mnist_image_regressor.h5")
    predict_result = model.predict(images=[image])
    post_result = model.post(predict_result)[0]
    print("predict_result", predict_result)
    print(label, post_result, label == post_result)


if __name__ == '__main__':
    import fire

    fire.Fire()

执行结果

  • 训练
[~]# python3 model.py train_image_classifier
Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
image_block_1/b...|vanilla           |?                 
image_block_1/n...|True              |?                 
image_block_1/a...|False             |?                 
image_block_1/c...|3                 |?                 
image_block_1/c...|1                 |?                 
image_block_1/c...|2                 |?                 
image_block_1/c...|True              |?                 
image_block_1/c...|False             |?                 
image_block_1/c...|0.25              |?                 
image_block_1/c...|32                |?                 
image_block_1/c...|64                |?                 
classification_...|flatten           |?                 
classification_...|0.5               |?                 
optimizer         |adam              |?                 
learning_rate     |0.001             |?                 

1500/1500 [==============================] - 76s 50ms/step - loss: 0.1742 - accuracy: 0.9471 - val_loss: 0.0739 - val_accuracy: 0.9791
...

  • 测试
[~]# python3 model.py test_image_classifier
predict_result [[3.1589475e-04 3.8880799e-02 5.0686980e-03 9.2180651e-01 9.0317568e-03
  2.1918179e-02 9.9024124e-05 3.8853439e-05 2.5504678e-03 2.8968096e-04]]
3 3 True

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值