Theano predict

def predict(
        dim_proj=128,  # word embedding的维数和隐藏层的维数,用默认值。(word embedding是一种将一个词转成一个向量的过程,这里不去深究)
        patience=10,  # 该参数用于earlystop,如果10轮迭代的误差没有降低,就进行earlystop
        max_epochs=4000,  # 迭代次数(将训练集迭代一轮为一个epoch, 每个迭代周期迭代所有的训练集)
        dispFreq=10,  # 每更新10次显示训练过程,即显示训练、验证和测试误差
        decay_c=0.,  # 参数U的正则权重,U为隐藏层ht到输出层的参数
        lrate=0.0001,  # sgd用的学习率
        n_words=10000,  # 词典大小,用于数据预处理部分,将词用该词在词典中的ID表示,超过10000的用1表示,仅仅用于数据,不做深究
        optimizer=adadelta,  # 优化方法,代码提供了sgd,adadelta和rmsprop三种方法,采用了adadelta.
        encoder='lstm',  # 一个标识符,可以去掉,但是必须是lstm
        saveto='model/lstm_model.npz',  # 保存最好模型的文件,保存训练误差,验证误差和测试误差等等
        validFreq=370,  # 验证频率 Compute the validation error after this number of update.
        saveFreq=1110,  # 保存频率 Save the parameters after every saveFreq updates
        maxlen=100,  # 序列的最大长度,超出长度的数据被抛弃,见数据处理部分
        batch_size=16,  # 训练的batch大小.
        valid_batch_size=64,  # 验证集用的*batch大小.
        dataset='imdb',  # 用于数据预处理的参数,全局变量datasets的key'imdb'的value为两个处理数据的函数
        noise_std=0.,  # 后边好像没有出现过,
        use_dropout=True,  # 控制dropout,不用dropout的话运行速度较快,但是效果不好,dropout不太好解释,以一定的概率随机丢弃模型中的一些节点,
        # 这样可以综合多种模型的结果,进行投票。需要自行补充deeplearning的知识

        reload_model=None,  # 加载模型参数的文件,用于已训练好的模型,或保存的中间结果
        test_size=-1,  # 测试集大小,如果为正,就只用这么多测试样本
):
    # Model options
    # 首先将当先的函数局部作用于的参数copy到字典model_options中,后面的很多函数就以model_options作为参数进行参数传递。
    model_options = locals().copy()  # 它将函数中所有参数复制,保存为一个词典
    print("model options", model_options)

    # 返回了两个函数:load_data,prepare_data这两个函数定义在imdb.py中
    # 数据已经事先存在了imdb.pkl中,这里用pickle方法load进来,第一项为训练数据,第二项为测试数据;
    # load_data函数将数据集读入,舍弃长度超过maxlen的数据,并按照参数valid_portion的比例将一部分训练集划为验证集。
    # 而第二个函数prepare_data负责数据的转换,在训练和测试的时候先将训练数据和测试数据的横轴和纵轴调换,并使数据维度保持一致,后面详细讲
    load_data, prepare_data = get_dataset(dataset)

    print('Loading data')
    # train 格式:(x集合,y集合), 其中x集合 = [word1索引,word2索引,word3索引]
    train, valid = load_data(n_words=n_words, valid_portion=0.05, maxlen=maxlen)

    # 如果我们设置了test_size的大小,这个步骤就是从测试集中随机找test_size个作为测试数据,
    # 如果没有设置test_size,会用所有的测试集数据做测试。
    # 原来的测试数据是根据长度排列的(imdb数据自身特点),这里做了一次打散

    # ydim为标签y的维数,因为是从0开始的,所以后面+1,并将它加入模型参数中
    ydim = numpy.max(train[1]) + 1

    model_options['ydim'] = ydim

    print('Building model')
    # 模型建立阶段,首先初始化各种参数,调用了全局作用域的函数init_params()
    params = init_params(model_options)

    if reload_model:
        load_params('lstm_model.npz', params)

    tparams = init_tparams(params)

    # 建立模型
    (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)

    start_time = time.time()

    kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

    for _, train_index in kf:

        # Select the random examples for this minibatch
        y = [train[1][t] for t in train_index]
        x = [train[0][t] for t in train_index]

        x, mask, y = prepare_data(x, y)
        print (x.shape)

        res = pred_probs(f_pred_prob, prepare_data, train, kf, True)
        print("【RES】", res.shape)
        # print(res)

        for r in res:
            print ("%s + %s = %f"%(r[0], r[1], float(r[0])+float(r[1])))

运行结果

(82, 16)
16/38 samples classified
32/38 samples classified
38/38 samples classified
【RES】 (38, 2)
0.500000280282 + 0.499999719718 = 1.000000
0.500022365161 + 0.499977634839 = 1.000000
0.499994092235 + 0.500005907765 = 1.000000
0.500028769156 + 0.499971230844 = 1.000000
0.500006347231 + 0.499993652769 = 1.000000
0.49999745113 + 0.50000254887 = 1.000000
0.500004658485 + 0.499995341515 = 1.000000
0.500028498057 + 0.499971501943 = 1.000000
0.500004125404 + 0.499995874596 = 1.000000
0.500034840627 + 0.499965159373 = 1.000000
0.500010874254 + 0.499989125746 = 1.000000
0.500021216939 + 0.499978783061 = 1.000000
0.500021058867 + 0.499978941133 = 1.000000
0.50002712303 + 0.49997287697 = 1.000000
0.500010389075 + 0.499989610925 = 1.000000
0.500021262485 + 0.499978737515 = 1.000000
0.50002967502 + 0.49997032498 = 1.000000
0.500016653088 + 0.499983346912 = 1.000000
0.500026446793 + 0.499973553207 = 1.000000
0.50002089169 + 0.49997910831 = 1.000000
0.5000127288 + 0.4999872712 = 1.000000
0.500019416336 + 0.499980583664 = 1.000000
0.500029114262 + 0.499970885738 = 1.000000
0.500021365958 + 0.499978634042 = 1.000000
0.500013568593 + 0.499986431407 = 1.000000
0.500016869466 + 0.499983130534 = 1.000000
0.500021213203 + 0.499978786797 = 1.000000
0.500023013772 + 0.499976986228 = 1.000000
0.500015465529 + 0.499984534471 = 1.000000
0.500027792589 + 0.499972207411 = 1.000000
0.500019467854 + 0.499980532146 = 1.000000
0.500032826809 + 0.499967173191 = 1.000000
0.500023604551 + 0.499976395449 = 1.000000
0.500019634245 + 0.499980365755 = 1.000000
0.500024585975 + 0.499975414025 = 1.000000
0.500033565917 + 0.499966434083 = 1.000000
0.500028160005 + 0.499971839995 = 1.000000
0.500022054487 + 0.499977945513 = 1.000000
(83, 16)
16/38 samples classified
32/38 samples classified
38/38 samples classified
【RES】 (38, 2)
0.500000280282 + 0.499999719718 = 1.000000
0.500022365161 + 0.499977634839 = 1.000000
0.499994092235 + 0.500005907765 = 1.000000
0.500028769156 + 0.499971230844 = 1.000000
0.500006347231 + 0.499993652769 = 1.000000
0.49999745113 + 0.50000254887 = 1.000000
0.500004658485 + 0.499995341515 = 1.000000
0.500028498057 + 0.499971501943 = 1.000000
0.500004125404 + 0.499995874596 = 1.000000
0.500034840627 + 0.499965159373 = 1.000000
0.500010874254 + 0.499989125746 = 1.000000
0.500021216939 + 0.499978783061 = 1.000000
0.500021058867 + 0.499978941133 = 1.000000
0.50002712303 + 0.49997287697 = 1.000000
0.500010389075 + 0.499989610925 = 1.000000
0.500021262485 + 0.499978737515 = 1.000000
0.50002967502 + 0.49997032498 = 1.000000
0.500016653088 + 0.499983346912 = 1.000000
0.500026446793 + 0.499973553207 = 1.000000
0.50002089169 + 0.49997910831 = 1.000000
0.5000127288 + 0.4999872712 = 1.000000
0.500019416336 + 0.499980583664 = 1.000000
0.500029114262 + 0.499970885738 = 1.000000
0.500021365958 + 0.499978634042 = 1.000000
0.500013568593 + 0.499986431407 = 1.000000
0.500016869466 + 0.499983130534 = 1.000000
0.500021213203 + 0.499978786797 = 1.000000
0.500023013772 + 0.499976986228 = 1.000000
0.500015465529 + 0.499984534471 = 1.000000
0.500027792589 + 0.499972207411 = 1.000000
0.500019467854 + 0.499980532146 = 1.000000
0.500032826809 + 0.499967173191 = 1.000000
0.500023604551 + 0.499976395449 = 1.000000
0.500019634245 + 0.499980365755 = 1.000000
0.500024585975 + 0.499975414025 = 1.000000
0.500033565917 + 0.499966434083 = 1.000000
0.500028160005 + 0.499971839995 = 1.000000
0.500022054487 + 0.499977945513 = 1.000000
(33, 6)
16/38 samples classified
32/38 samples classified
38/38 samples classified
【RES】 (38, 2)
0.500000280282 + 0.499999719718 = 1.000000
0.500022365161 + 0.499977634839 = 1.000000
0.499994092235 + 0.500005907765 = 1.000000
0.500028769156 + 0.499971230844 = 1.000000
0.500006347231 + 0.499993652769 = 1.000000
0.49999745113 + 0.50000254887 = 1.000000
0.500004658485 + 0.499995341515 = 1.000000
0.500028498057 + 0.499971501943 = 1.000000
0.500004125404 + 0.499995874596 = 1.000000
0.500034840627 + 0.499965159373 = 1.000000
0.500010874254 + 0.499989125746 = 1.000000
0.500021216939 + 0.499978783061 = 1.000000
0.500021058867 + 0.499978941133 = 1.000000
0.50002712303 + 0.49997287697 = 1.000000
0.500010389075 + 0.499989610925 = 1.000000
0.500021262485 + 0.499978737515 = 1.000000
0.50002967502 + 0.49997032498 = 1.000000
0.500016653088 + 0.499983346912 = 1.000000
0.500026446793 + 0.499973553207 = 1.000000
0.50002089169 + 0.49997910831 = 1.000000
0.5000127288 + 0.4999872712 = 1.000000
0.500019416336 + 0.499980583664 = 1.000000
0.500029114262 + 0.499970885738 = 1.000000
0.500021365958 + 0.499978634042 = 1.000000
0.500013568593 + 0.499986431407 = 1.000000
0.500016869466 + 0.499983130534 = 1.000000
0.500021213203 + 0.499978786797 = 1.000000
0.500023013772 + 0.499976986228 = 1.000000
0.500015465529 + 0.499984534471 = 1.000000
0.500027792589 + 0.499972207411 = 1.000000
0.500019467854 + 0.499980532146 = 1.000000
0.500032826809 + 0.499967173191 = 1.000000
0.500023604551 + 0.499976395449 = 1.000000
0.500019634245 + 0.499980365755 = 1.000000
0.500024585975 + 0.499975414025 = 1.000000
0.500033565917 + 0.499966434083 = 1.000000
0.500028160005 + 0.499971839995 = 1.000000
0.500022054487 + 0.499977945513 = 1.000000

说明:测试样本数 38,处理与训练时相同,输入格式为

[(样本一,[词索引]),(样本二,[词索引]),(样本三,[词索引])]

mini_batch = 16,因此迭代次数为 3,返回结果中:返回两个类别的概率,总和为1.0,从上面的结果可以看出,模型的效果非常差。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值