from __future__ import unicode_literals, print_function, division
from io importopenimport glob
import os
import torch
deffindFiles(path):return glob.glob(path)#print(findFiles('data/names/*.txt'))import unicodedata
import string
all_letters = string.ascii_letters +" .,;'"
n_letters =len(all_letters)print(all_letters,n_letters)# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427defunicodeToAscii(s):return''.join(
c for c in unicodedata.normalize('NFD', s)if unicodedata.category(c)!='Mn'and c in all_letters
)print(unicodeToAscii('Ślusàrski'))
数据预处理(将人名按字符转化为tensor)
# Build the category_lines dictionary, a list of names per language
category_lines ={
}#字典
all_categories =[]# Read a file and split into linesdefreadLines(filename):
lines =open(filename, encoding='utf-8').read().strip().split('\n')#print([unicodeToAscii(line) for line in lines])return[unicodeToAscii(line)for line in lines]for filename in findFiles('data/names/*.txt'):
category = os.path.splitext(os.path.basename(filename))[0]
all_categories.append(category)
lines = readLin