基于规则嵌入的论文比对系统(16)-有规则嵌入的模型和无规则嵌入的模型的比较


为了将有规则的模型和无规则的模型的效果进行比较,所以又搭建了一个无规则的模型。
其实就是在原来的有规则的模型的基础上把规则嵌入部分的输入去掉了。

模型

五无规则嵌入的模型的代码

class MyModel_noRule():
    def __init__(self, batch_size=None, num_epochs=None, word_index=None,embedword_matrix=None, subId=None,
                 index_pad_array_first=None, index_pad_array_second=None,y=None):
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.word_index = word_index
        self.embedword_matrix=embedword_matrix
        self.subId=subId
        self.index_pad_array_first=index_pad_array_first
        self.index_pad_array_second=index_pad_array_first
        self.y=y
        self.model = None
    def buildmodel(self):
        print('building model...')
        embedding_layer = Embedding(len(word_index) + 1,
                                           256,
                                           weights=[embedword_matrix],
                                           input_length=150, trainable=True)
        sequence_input1 = Input(shape=(150,), name="first_paper")
        sequence_input2 = Input(shape=(150,), name="second_paper")
        embedded_sequences1 = embedding_layer(sequence_input1)
        embedded_sequences2 = embedding_layer(sequence_input2)       
        LSTM_Left1 = LSTM(512, implementation=2, return_sequences=True, go_backwards=False)(embedded_sequences1)
        LSTM_Right1 = LSTM(512, implementation=2, return_sequences=True, go_backwards=True)(embedded_sequences1)
        concat1 = merge([LSTM_Left1,LSTM_Right1], mode='concat', concat_axis=-1)
        LSTM_Left2 = LSTM(512, implementation=2, return_sequences=True, go_backwards=False)(embedded_sequences2)
        LSTM_Right2 = LSTM(512, implementation=2, return_sequences=True, go_backwards=True)(embedded_sequences2)
        concat2 = merge([LSTM_Left2,LSTM_Right2], mode='concat', concat_axis=-1)
        z1 = Dense(512, activation='tanh')(concat1)
        z2 = Dense(512, activation='tanh')(concat2)
        z1_MaxPool = Lambda(lambda x: K.max(x, axis=1), output_shape=(512,))(z1)
        z2_MaxPool = Lambda(lambda x: K.max(x, axis=1), output_shape=(512,))(z2)
        concat=merge([z1_MaxPool,z2_MaxPool], mode='concat', concat_axis=-1)
        model_final = Dense(6, activation='relu')(concat)
        model_final = Dropout(0.5)(model_final)
        model_final = Dense(2, activation='softmax')(model_final)
        self.model = Model(input=[sequence_input1, sequence_input2],
                           outputs=model_final)
        adam = optimizers.adam(lr=0.0001)
        self.model.compile(loss='binary_crossentropy',
                           optimizer=adam,
                           metrics=['accuracy'])
        print(self.model.summary())
    def trainmodel(self):
        self.buildmodel()
        checkpointer = ModelCheckpoint(filepath="model_/"+str(self.subId)+"_model-{epoch:02d}.hdf5", period=1)
        y_train= np.asarray(self.y).astype('float32')
        self.model.fit([self.index_pad_array_first,self.index_pad_array_second],y_train,
                    self.batch_size,self.num_epochs, verbose=1,
                           callbacks=[checkpointer])
        self.save_model()
    def predmodel(self,modelname,index_pad_array_first, index_pad_array_second): 
        self.model = load_model(modelname)
        predlabel = self.model.predict([index_pad_array_first, index_pad_array_second],
                                           batch_size=512, verbose=1)
        return predlabel
    def save_model(self):
        self.model.save("model_/model" +str(self.subId)+ '.h5')

训练好的模型存储的位置

在这里插入图片描述

预测

预测函数代码

def predmodel_norules(modelname,PaperIdList,dicti,dictAlli,subId):
    AllPaperPairs=list(itertools.combinations(PaperIdList, 2))
    first_list=[]
    second_list=[]
    AllPaperPairs_len=len(AllPaperPairs)
    for i in tqdm(range(AllPaperPairs_len)):
        pad_array_first,pad_array_second=test2sequence(AllPaperPairs[i][0],AllPaperPairs[i][1],dicti)
        a=pad_array_first.tolist()[0]
        b=pad_array_second.tolist()[0]
        first_list.append(a)
        second_list.append(b)
        pass
    index_pad_array_first=np.array(first_list)
    index_pad_array_second=np.array(second_list)
    model=load_model(modelname)
    predlabel =model.predict([index_pad_array_first, index_pad_array_second],
                                           batch_size=512, verbose=1)
    predlabel_list=predlabel.tolist()
    finalresult=[]
    for i in tqdm(range(len(AllPaperPairs))):
        if predlabel_list[i][0]>0.5:
            temp=AllPaperPairs[i]
            rules=rulesEmbbeding(temp[0],temp[1],subId)
            finalresult.append([temp,(dictAlli[temp[0]],dictAlli[temp[1]]),("reference",rules[0]),("keywords",rules[1]),("ccs",rules[2]),("Text Keywords",rules[3])])
        pass
    return finalresult

预测输出

在这里插入图片描述
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值