def main(p2hzfile,profile):
p2hz = {}
ngramprob = {}
pyinit(p2hz , p2hzfile)
ngraminit(ngramprob , profile)
while 1:
pinyin = input("请输入")
if pinyin == 'q':
break
ret = IME(pinyin , p2hz , ngramprob)
print(ret)
def pyinit(p2hz,file):
f = open("file")
for line in f:
line = line.strip()
line = line.split(' ')
for i in range(1,len(line)):
if not p2hz.get(line[0]):
p2hz[line[0]] = []
p2hz[line[0]].append(line[i])
p2hz['E'] = []
p2hz['B'] = []
p2hz['E'].append('E')
p2hz['B'].append('B')
f.close()
def ngraminit(ngramprob , file):
f = open('file')
for line in f:
line = line.strip()
line = line.split(' ')
ngramprob[line[0]] = float(line[1])
f.close()
def IME(pinyin , p2hz , prob):
Lattice = []
if BuildLattice(pinyin , Lattice , p2hz) == 0:
return
SearchLattice(Lattice , prob)
ret = BackLattice(Lattice)
return ret
def BuildLattice(py , shuju , p2hz):
list = py.split(' ')
list.insert(0,'B')
list.append('E')
for zi in list:
zis = []
getzis(zi , p2hz , zis)
column = []
for i in zis:
x = []
x.append(i)
x.append(-100)
x.append(0)
column.append(x)
shuju.append(column)
return 1
def getzis(zi , p2hz , zis):
if p2hz.get(zi):
for i in p2hz[zi]:
zis.append(i)
def SearchLattice(shuju , probfile):
for i in range(1,len(shuju)):
for j in range(len(shuju[i])):
prob = 0.0
max = -1000
for k in range(len(shuju[i-1])):
if i-1>0:
hz = shuju[i-1][k][0] + shuju[i][j][0]
else :
hz = shuju[i][j][0]
prob = getprob(hz,probfile) + shuju[i-1][k][1]
if prob > max:
shuju[i][j][2] = k
max = prob
shuju[i][j][1] = max
def getprob(zi,prob):
ret = -100.0
if prob.get(zi):
ret = prob[zi]
return ret
def BackLattice(shuju):
unit = []
no = len(shuju) -1
unit = shuju[no][len(shuju[no])-1]
relarray = []
while no > 0:
if unit[0] != 'E':
relarray.insert(0,unit[0])
unit = shuju[no-1][unit[2]]
no -= 1
ret = "".join(relarray)
return ret
花了一个半小时吧,还是不太熟练