BIO
B-begin,I-inside,O-outside
BMES
B-begin,M-middle,E-end,S-single
BIOES
B-begin,I-inside,O-outside,E-end,S-single
以下转换之前默认处理好文件了(如无空行等)。只做了简单的测试,如果还有 bug 自己改一下就好了。因为我用的改版 Yeeda,可以直接拿到 BMES 标注的 anns 文件。
BMES 2 BIOES
import os
cur_dir = os.getcwd()
file_bmes = open('%s\\1_BMES.txt' % cur_dir, 'r', encoding='utf-8')
file_bioes = open('%s\\1_BIOES.txt' % cur_dir, 'w+', encoding='utf-8')
str1 = []
for line in file_bmes.readlines():
if line != "\n":
line1 = line.split()
str2 = line1[0]
for i in range(1, len(line1)):
line2 = list(line1[i])
if line2[0] == "M":
line2[0] = "I"
str3 = ''
for i in line2:
str3 = str3 + i
str2 = str2 + ' ' + str3
str1.append(str2)
else:
str1.append(line)
str = '\n'.join(str1)
file_bioes.write(str)
# 这个最后会多个空行
# for j in str1:
# file_bioes.write(j)
# file_bioes.write("\n")
file_bioes.close()
file_bmes.close()
BMES 2 BIO
import os
cur_dir = os.getcwd()
file_bmes = open('%s\\1_BMES.txt' % cur_dir, 'r', encoding='utf-8')
file_bio = open('%s\\1_BIO.txt' % cur_dir, 'w+', encoding='utf-8')
str1 = []
for line in file_bmes.readlines():
line1 = line.split()
if len(line1) > 1:
line2 = list(line1[1])
if line2[0] == "M" or line2[0] == "E":
line2[0] = "I"
elif line2[0] == "S":
line2[0] = "B"
str2 = ''
for i in line2:
str2 = str2 + i
line = line1[0] + ' ' + str2
str1.append(line)
str = '\n'.join(str1)
file_bio.write(str)
# for j in str1:
# file_bio.write(j)
# file_bio.write("\n")
file_bio.close()
file_bmes.close()