import random
# 地名代表词列表
places = ['北京', '上海', '广州', '深圳', '成都', '杭州', '武汉', '南京', '重庆', '西安']
# 时间代表词列表
times = ['今天', '明天', '后天', '周一', '周二', '周三', '周四', '周五', '周六', '周日']
# 生成50条自然语句
sentences = []
for i in range(50):
r = random.randint(0, 5)
if r == 0:
sentences.append(f"请给我买一张从{random.choice(places)}到{random.choice(places)}的火车票。")
elif r == 1:
sentences.append(f"我想去{random.choice(places)},请帮我定一张火车票。")
elif r == 2:
sentences.append(f"我要乘坐明天的G1234次列车去{random.choice(places)}。")
elif r == 3:
sentences.append(f"请问明天从{random.choice(places)}到{random.choice(places)}的火车票还有吗?")
else:
sentences.append(f"请给我买一张{random.choice(times)}从{random.choice(places)},{random.choice(times)}到{random.choice(places)}的火车票。")
# 定义标记函数
def tag_word(word):
if word.startswith('北京') or word.startswith('上海') or word.startswith('广州') or word.startswith('深圳') or word.startswith('成都') or word.startswith('杭州') or word.startswith('武汉') or word.startswith('南京') or word.startswith('重庆') or word.startswith('西安'):
if len(word) == 2:
return 'B-Placeofdeparture', 'O'
else:
return 'B-Placeofdeparture', 'I-Placeofdeparture'
elif word in ['今天', '明天', '后天', '周一', '周二', '周三', '周四', '周五', '周六', '周日']:
if len(word) == 2:
return 'B-Departuretime', 'O'
else:
return 'B-Departuretime', 'I-Departuretime'
elif word.startswith('G') and word[1:].isdigit():
return 'O', 'O'
else:
if len(word) == 2:
return 'B-Destination', 'O'
else:
return 'B-Destination', 'I-Destination'
# 对每一个字进行标记
with open('ziran1234.txt', 'w') as f:
for sentence in sentences:
for word in sentence:
tag1, tag2 = tag_word(word)
f.write(f"{word}\t{tag1}\t{tag2}\n")
f.write('\n')