利用宋词语料库,通过单双词的组合与模板的匹配,实现指定词牌宋词的生成
import random
import tkinter as tk
import re
from tkinter import messagebox
list = []
class Window:
# 界面设计
def __init__(self, root):
label1 = tk.Label(root, text='输入词牌名')
label1.place(x=5, y=5)
label2 = tk.Label(root, text='宋词生成结果')
label2.place(x=5, y=70)
self.entryCp = tk.Entry(root)
self.entryCp.place(x=120, y=5)
self.text = tk.Text(root)
self.text.place(y=100)
self.get = tk.Button(root, text='生成宋词', command=self.songci)
self.get.place(x=120, y=40)
exit = tk.Button(root, text="退出程序", command=root.destroy)
exit.place(x=200, y=40)
# 生成宋词
def songci(self):
# 获取词牌名
name = self.entryCp.get()
# 读取源文件
file = open('Ci.txt', 'r', encoding='utf-8')
for line in file.readlines():
line = line.strip()
list.append(line)
# 匹配索引位置,遇空则继续
s = list.index(name) + 1
while list[s] == '':
s += 1
# 去掉分隔符,生成新列表
cut = re.split('[,。、]', list[s])
newcut = ['' for s in range(len(cut))]
for i in range(len(cut)):
for j in range(len(cut[i])):
if '\u4e00' <= cut[i][j] <= '\u9fa5':
newcut[i] += cut[i][j]
temp = ''
for i in range(len(newcut)):
sum = 0
n = 1
sum = sum + n
temp = temp + str(n)
while len(newcut[i]) - sum > 1:
# n = random.randint(1, 2)
sum = sum + n
temp = temp + str(n)
if len(newcut[i]) - sum == 1:
temp = temp + str(1) + '/'
if len(newcut[i]) - sum == 0:
temp = temp + '/'
temp = temp[:-1]
# 读入单双词语料
file1 = open('Ciout1.txt', 'r', encoding='utf-8')
file2 = open('Ciout2.txt', 'r', encoding='utf-8')
list1, list2 = [], []
# 去掉逗号,向新列表中加入元素
# 单词
line1 = re.split('[(,)]', file1.read())
for i in range(int((len(line1) - 1) / 3)):
for j in range(int(line1[i * 3 + 2])):
list1.append(line1[i * 3 + 1])
# 双词
line2 = re.split('[(,)]', file2.read())
for i in range(int((len(line2) - 1) / 3)):
for j in range(int(line2[i * 3 + 2])):
list2.append(line2[i * 3 + 1])
# 输出内容
content = ''
for i in temp:
if i == '1':
content = content + list1[random.randint(0, len(list1) - 1)]
elif i == '2':
content = content + list2[random.randint(0, len(list2) - 1)]
else:
content = content + '/'
print(content)
self.text.insert(tk.END, content)
file.close()
file1.close()
file2.close()
root = tk.Tk()
root.title("宋词自动生成器")
root.geometry("350x300+500+250")
window = Window(root)
root.minsize(550, 250)
root.mainloop()
实现效果: