nltk的简单应用

简单例子

# -*- coding: utf-8 -*-
"""
http://www.nltk.org/
首页示例
"""
import nltk
 
# Tokenize and tag some text:
sentence = "At eight o'clock on Thursday morning Arthur didn't feel very good."
tokens = nltk.word_tokenize(sentence)
print tokens
 
tagged = nltk.pos_tag(tokens)
print tagged[0:6]
 
# Identify named entities:
entities = nltk.chunk.ne_chunk(tagged)
print entities
 
# Display a parse tree:
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0001.mrg')[0]
t.draw()
 
# NLTK中文语料库 sinica_treebank
from nltk.corpus import sinica_treebank
sinica_text = nltk.Text(sinica_treebank.words())
print sinica_text
 
for (key, var) in sinica_treebank.tagged_words()[:8]:
    print '%s%s' % (key, var),
 
# NLTK中文句法树
sinica_treebank.parsed_sents()[15].draw()


演示使用NLTK让计算机学习如何通过名字识别性别

# -*- coding: utf-8 -*-
 
"""
演示使用NLTK让计算机学习如何通过名字识别性别。
"""
 
import nltk
 
# 定义学习方法
def gender_features(word):
    return {'last_letter':word[-1]}
 
# 导入学习的姓名性别名单
from nltk.corpus import names
import random
names = ([(name, 'male') for name in names.words('male.txt')] + [(name, 'female') for name in names.words('female.txt')])
random.shuffle(names)
 
# 开始学习
f = [(gender_features(n), g) for (n, g) in names]
trainset, testset = f[500:], f[:500]
c = nltk.NaiveBayesClassifier.train(trainset)
 
# 测试
print c.classify(gender_features('Neo'))
print c.classify(gender_features('Trinity'))



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值