自然语言处理(NLP) 四:性别识别

import random 
import numpy as np 
import nltk.corpus as nc 
import nltk.classify as cf 
male_names = nc.names.words('male.txt')
female_names = nc.names.words('female.txt')
models,acs = [],[]
for n_letters in range(1,6):
    data = []
    for male_name in male_names:
        feature = {'feature':male_name[-n_letters:].lower()}
        data.append((feature,'male'))
    for female_name in female_names:
        feature = {'feature':female_name[-n_letters:].lower()}
        data.append((feature,'female'))
    random.seed(7)
    random.shuffle(data)
    train_data = data[:int(len(data)/2)]
    test_data = data[int(len(data)/2):]
    model = cf.NaiveBayesClassifier.train(train_data)
    ac = cf.accuracy(model,test_data)
    models.append(model)
    acs.append(ac)
best_index = np.array(acs).argmax()
best_letters = best_index + 1
print(best_letters)
best_model = models[best_index]
best_ac = acs[best_index]
print(best_letters,'%.2f%%'%round(best_ac*100,2))
names = ['Leonardo','Amy','Sam','Tom','Katherine','Taylor','Susanne','Watermelon','Alpaca','Paris','Python','Java']
print(names)
genders = []
for name in names:
    feature = {'feature':name[-best_letters:]}
    gender = best_model.classify(feature)
    genders.append(gender)
print(genders)
  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值