自然语言，图像处理

最新推荐文章于 2024-05-20 22:15:20 发布

weixin_43847832

最新推荐文章于 2024-05-20 22:15:20 发布

阅读量347

点赞数

本文链接：https://blog.csdn.net/weixin_43847832/article/details/89390183

版权

情感分析

import nltk.corpus as nc
import nltk.classify as cf
import nltk.classify.util as cu


pdata = []
fileids = nc.movie_reviews.fileids("pos")
for fileid in fileids:
  feature = {}
  words = nc.movie_reviews.words(fileid)
  for word in words:
    feature[word] = True
  pdata.append((feature,"POSITIVE"))
ndata = []
nfileids = nc.movie_reviews.fileids("neg")
for fileid in nfileids:
  feature = {}
  words = nc.movie_reviews.words(fileid)
  for word in words:
    feature[word] = True
  ndata.append((feature,"NEGETIVE"))
pnum,nnum = int(0.8 * len(pdata)), int(0.8 * len(ndata))
train_data = pdata[:pnum] + ndata[:nnum]
test_data = pdata[pnum:] + ndata[nnum:]
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model,test_data)
print("%.2f%%"%round(ac*100,2))

tops = model.most_informative_features()# 关键字
for top in tops:
  print(top[0])

reviews = [
"It is a amazing movie.",
"This is a dull movie. I would never recommend it to anyone",
"The cinematography is pretty great in this movie.",
"This direction was  terrible and the story was all over the place."
]
sents, probs = [], []
for review in reviews:
  feature = {}
  words = review.split()
  for word in words:
     feature[word] = True
  pcls = model.prob_classify(feature)
  sent = pcls.max()
  prob = pcls.prob(sent)
  sents.append(sent)
  probs.append(prob)
for review, sent, probs in zip(reviews, sents, probs):
  print(review,"->",sent,"%.2f%%"%round(prob * 100, 2))

性别识别

import random
import numpy as np
import nltk.corpus as nc
import nltk.classify as cf

male_names = nc.names.words("male.txt")
female_names = nc.names.words("female.txt")

models, acs = [],[]
for n_letter in range(1,6):
  data = []
  for male_name in male_names:
    feature= {"feature":male_name[-n_letter:].lower()}
    data.append((feature,"male"))
  for female_name in female_names:
    feature = {"feature":female_name[-n_letter:].lower()}
    data.append((feature,"female"))
  random.seed(7)
  random.shuffle(data)
  train_data = data[:int(len(data)/2)]
  test_data = data[int(len(data)/2):]
  model = cf.NaiveBayesClassifier.train(train_data)
  ac = cf.accuracy(model,test_data)
  acs.append(ac)
  models.append(model)
best_index = np.array(acs).argmax()
best_letter = best_index + 1 
names = ["Leonardo","Amy","Sam","Tom","Katherine","Tayior",
         "Susanne","Watermelon","Alpaca","Paris","Python","JAVA"]
print(names)
genders = []
for name in names:
  feature = {"feature":name[-best_letter:].lower()}
  gender = models[best_index].classify(feature)
  genders.append(gender)
print(genders)

文本分类

import sklearn.datasets as sd
import sklearn.feature_extraction.text as ft
import sklearn.naive_bayes as nb

cld = {"misc.forsale":"SALES","rec.motorcycles":"MOTORCYLES",
       "rec.sport.baseball":"BASEBALL",
       "sci.crypt":"CRYPTOGRAPHY",
       "sci.space":"SPAVE"}
train = sd.fetch_20newsgroups(subset="train",
                                   categories=cld.keys(),random_state=7,
                                   shuffle = True)
train_data = train.data

train_y = train.target
cat

最低0.47元/天解锁文章

weixin_43847832

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
自然语言，图像处理

情感分析import nltk.corpus as ncimport nltk.classify as cfimport nltk.classify.util as cupdata = []fileids = nc.movie_reviews.fileids("pos")for fileid in fileids: feature = {} words = nc.movie...
复制链接

扫一扫