-
原创AI程序写诗:训练
-
先看看写出来什么诗~
一分钟
,诗词一秒成!
来看看程序
from gensim.models import Word2Vec # 词向量
from random import choice
from os.path import exists
from warnings import filterwarnings
filterwarnings('ignore') # 不打印警告
class CONF:
path = '古诗词.txt' # 语料路径
window = 16 # 滑窗大小
min_count = 60 # 过滤低频字
size = 125 # 词向量维度
topn = 14 # 生成诗词的开放度
model_path = 'word2vec' # 模型路径
class Model:
def __init__(self, window, topn, model):
self.window = window
self.topn = topn
self.model = model # 词向量模型
self.chr_dict = model.wv.index2word # 字典
"""模型初始化"""
@classmethod
def initialize(cls, config):
if exists(config.model_path):
# 模型读取
model = Word2Vec.load(config.model_path)
else:
# 语料读取
with open(config.path, encoding='utf-8') as f:
ls_of_ls_of_c = [list(line.strip()) for line in f]
# 模型训练和保存
model = Word2Vec(ls_of_ls_of_c, config.size,
window=config.window, min_count=config.min_count)
model.save(config.model_path)
return cls(config.window, config.topn, model)
"""古诗词生成"""
def poem_generator(self, title, form):
# list_of_tuples --> list_of_strings 并过滤标点符号
filter = lambda lst: [t[0] for t in lst if t[0] not in [',', '。']]
# 标题补全
if len(title) < 4:
if not title:
title += choice(self.chr_dict)
for _ in range(4 - len(title)):
chrs = self.model.similar_by_word(title[-1], self.topn // 2)
chrs = filter(chrs)
char = choice([c for c in chrs if c not in title])
title += char
# 文本生成
seq = list(title)
for i in range(form[0]):
for _ in range(form[1]):
chrs = self.model.predict_output_word(
seq[-self.window:], max(self.topn, len(seq) + 1))
chrs = filter(chrs)
char = choice([c for c in chrs if c not in seq[len(title):]])
seq.append(char)
seq.append(',' if i % 2 == 0 else '。')
# 返回标题+主体
length = form[0] * (form[1] + 1)
title = '《%s》' % ''.join(seq[:-length])
poem = ''.join(seq[-length:])
return title + '\n' + poem
def main(config=CONF):
form = {'五言绝句': (4, 5), '七言绝句': (4, 7), '对联': (2, 9)}
m = Model.initialize(config)
while True:
title = input('输入标题:').strip()
try:
poem = m.poem_generator(title, form['五言绝句'])
print('\033[031m%s\033[0m' % poem) # red
poem = m.poem_generator(title, form['七言绝句'])
print('\033[033m%s\033[0m' % poem) # yellow
poem = m.poem_generator(title, form['对联'])
print('\033[036m%s\033[0m' % poem) # purple
print()
except:
pass
if __name__ == '__main__':
main()
语料下载地址
https://gitee.com/arye/dl/tree/master/NLP/gensim%E6%96%87%E6%9C%AC%E7%94%9F%E6%88%90
关于程序的一些基础
猛戳→gensim词向量基础
更多诗词创作
山水田园
宫廷
别塞战争
抒情