name_dataset = './name.csv' train_x = [] train_y = [] with open(name_dataset, 'r',encoding='UTF-8') as f: first_line = True for line in f: if first_line is True: first_line = False continue sample = line.strip().split(',') if len(sample) == 2: train_x.append(sample[0]) if sample[1] == '男': train_y.append([0, 1]) # 男 else: train_y.append([1, 0]) # 女
max_name_length = max([len(name) for name in train_x]) # print("最长名字的字符数: ", max_name_length) max_name_length = 8 counter = 0 vocabulary = {} for name in train_x: counter += 1 tokens = [word for word in name] for word in tokens: if word in vocabulary: vocabulary[word] += 1 else: vocabulary[word] = 1