自然语言,图像处理

情感分析

import nltk.corpus as nc
import nltk.classify as cf
import nltk.classify.util as cu


pdata = []
fileids = nc.movie_reviews.fileids("pos")
for fileid in fileids:
  feature = {}
  words = nc.movie_reviews.words(fileid)
  for word in words:
    feature[word] = True
  pdata.append((feature,"POSITIVE"))
ndata = []
nfileids = nc.movie_reviews.fileids("neg")
for fileid in nfileids:
  feature = {}
  words = nc.movie_reviews.words(fileid)
  for word in words:
    feature[word] = True
  ndata.append((feature,"NEGETIVE"))
pnum,nnum = int(0.8 * len(pdata)), int(0.8 * len(ndata))
train_data = pdata[:pnum] + ndata[:nnum]
test_data = pdata[pnum:] + ndata[nnum:]
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model,test_data)
print("%.2f%%"%round(ac*100,2))

tops = model.most_informative_features()# 关键字
for top in tops:
  print(top[0])

reviews = [
"It is a amazing movie.",
"This is a dull movie. I would never recommend it to anyone",
"The cinematography is pretty great in this movie.",
"This direction was  terrible and the story was all over the place."
]
sents, probs = [], []
for review in reviews:
  feature = {}
  words = review.split()
  for word in words:
     feature[word] = True
  pcls = model.prob_classify(feature)
  sent = pcls.max()
  prob = pcls.prob(sent)
  sents.append(sent)
  probs.append(prob)
for review, sent, probs in zip(reviews, sents, probs):
  print(review,"->",sent,"%.2f%%"%round(prob * 100, 2))
  

性别识别

import random
import numpy as np
import nltk.corpus as nc
import nltk.classify as cf

male_names = nc.names.words("male.txt")
female_names = nc.names.words("female.txt")

models, acs = [],[]
for n_letter in range(1,6):
  data = []
  for male_name in male_names:
    feature= {"feature":male_name[-n_letter:].lower()}
    data.append((feature,"male"))
  for female_name in female_names:
    feature = {"feature":female_name[-n_letter:].lower()}
    data.append((feature,"female"))
  random.seed(7)
  random.shuffle(data)
  train_data = data[:int(len(data)/2)]
  test_data = data[int(len(data)/2):]
  model = cf.NaiveBayesClassifier.train(train_data)
  ac = cf.accuracy(model,test_data)
  acs.append(ac)
  models.append(model)
best_index = np.array(acs).argmax()
best_letter = best_index + 1 
names = ["Leonardo","Amy","Sam","Tom","Katherine","Tayior",
         "Susanne","Watermelon","Alpaca","Paris","Python","JAVA"]
print(names)
genders = []
for name in names:
  feature = {"feature":name[-best_letter:].lower()}
  gender = models[best_index].classify(feature)
  genders.append(gender)
print(genders)

文本分类

import sklearn.datasets as sd
import sklearn.feature_extraction.text as ft
import sklearn.naive_bayes as nb

cld = {"misc.forsale":"SALES","rec.motorcycles":"MOTORCYLES",
       "rec.sport.baseball":"BASEBALL",
       "sci.crypt":"CRYPTOGRAPHY",
       "sci.space":"SPAVE"}
train = sd.fetch_20newsgroups(subset="train",
                                   categories=cld.keys(),random_state=7,
                                   shuffle = True)
train_data = train.data

train_y = train.target
cat
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值