1. 添加代码至word_sequence.py
def transform(self,sentence,max_len=None):
assert self.fitted, "WordSequence尚未进行fit操作"
if max_len is not None:
r=[self.PAD]*max_len
else:
r=[self.PAD]*len(sentence)
for index, a in enumerate(sentence):
if max_len is not None and index>=len(r):
break
r[index]=self.to_index(a)
return np.array(r)
def inverse_transform(self, indices, ignore_pad=False, ignore_unk=False, ignore_start=False,
ignore_end=False):
ret=[]
for i in indices:
word=self.to_word(i)
if word==WordSequence.PAD_TAG and ignore_pad:
continue
if word==WordSequence.UNK_TAG and ignore_unk:
continue
if word==WordSequence.START_TAG and ignore_start:
continue
if word==WordSequence.END_TAG and ignore_end:
continue
ret.append(word)
return ret
def test():
ws = WordSequence()
ws.fit([
['你','好','啊'],
['你','好','哦'],
])
indice = ws.transform(['我','们','好'])
print(indice)
back=ws.inverse_transform(indice)
print(back)
if __name__=='__main__':
test()
2.运行WordSequence.py