读入数据
# 使用pandas读入数据
import pandas as pd
poems_text = pd.read_table('./poems_clean.txt', header=None)
poems_text.columns = ["text"] #加上text
# 查看文本
poems_text.head() #诗名:内容
结果如下图:
import string #处理字符串
import numpy as np
poems_new = []
for line in poems_text['text']: # poems_text[0]的第0列,指
title, poem = line.split(':') #按:切割
poem = poem.replace(' ', '') #将空格去掉
poem = 'bbb' + poem
poems_new.append(list(poem))
数据整理
XY =[]
for poem in poems_new:
for i in range(len(poem) - 3): #前三个字是bbb
x1 = poem[i]
x2 = poem[i+1]
x3 = poem[i+2]
y = poem[i+3] #要猜的字
XY.append([x1,</