参照贪心科技的视频,按照其中的教学一步一步写出的代码,经过测试,可以运行,写出来供大家参考学习之。
import numpy as np
tag2id, id2tag = {
}, {
}
word2id, id2word = {
}, {
}
for line in open('traindata.txt'): # 抽取单词和词性
items = line.split('/')
word, tag = items[0], items[1].rstrip()
if word not in word2id:
word2id[word] = len(word2id)
id2word[len(id2word)] = word
if tag not in tag2id:
tag2id[tag] = len(tag2id)
id2tag[len(id2tag)] = tag
M = len(word2id) # 词典的大小
N = len(tag2id) # 词性种类个数
# print(M, N)
# print(id2tag)
# 构建 pi,A,B
pi = np.zeros(N) # 每个单词出现在句子第一个位置的概率
A = np.zeros((N, M)) # A[i