完整代码如下: name.csv 需要自己采集数据import tensorflow as tf
name_dataset = './name.csv'
train_x = []
train_y = []
with open(name_dataset, 'r',encoding='UTF-8') as f:
first_line = True
for line in f:
if first_line is True:
first_line = False
continue
sample = line.strip().split(',')
if len(sample) == 2:
train_x.append(sample[0])
if sample[1] == '男':
train_y.append([0, 1]) # 男
else:
train_y.append([1, 0]) # 女
max_name_length = max([len(name) for name in train_x])
# print("最长名字的字符数: ", max_name_length)
max_name_length = 8
counter = 0
vocabulary = {}
for name in train_x:
counter += 1
tokens = [word for word in name]
for word in tokens:
if word in vocabulary:
vo