from textblob import TextBlob
import pandas as pd
#报错Resource averaged_perceptron_tagger not found. Please use the NLTK Downloade的解决方法
import nltk
nltk.download('averaged_perceptron_tagger')
#读入数据
Idea=pd.read_csv(r"E:\1.csv",encoding="utf-8")
i=len(Idea)
print(i)
content=[]
content
for x in range(0,i):
j=Idea['Summary'][x]
#print(j)
blob=TextBlob(j)
tag=blob.tags
content.append([tag])
dd=pd.DataFrame(content,columns=['tag'])
dd.to_csv(r"E:\tag.csv")
#得到了以下结果,需要分开[('The', 'DT'), ('3-D', 'JJ'), ('printed', 'JJ'), ('TicTac', 'NNP'), ('gun', 'NN'), ('I', 'PRP'), ('made', 'VBD'), ('as', 'IN'), ('an', 'DT'), ('idea', 'NN'), ('me', 'PRP'), ('and', 'CC'), ('my', 'PRP$'), ('kids', 'NNS'), ('came', 'VBD'), ('up', 'RP'), ('with', 'IN'), ('the', 'DT'), ('reason', 'NN')]
#剩下的统计每个词性可以用excel计算,计算函数如下:=(LEN(B2)-LEN(SUBSTITUTE(B2,"DT","")))/LEN("DT")
'''
将结果分开
'''
import openpyxl
from collections import Counter
import csv
wb = openpyxl.load_workbook(r"E:/tag.xlsx") # 文件位置 xlsx文件
wb = wb.active
for i in range(2, 27): # 第几行到第几行
a = eval(wb.cell(row=i, column=2).value) # 第i行 第2列
list = [] # 放词性
for tag in a:
list.append(tag[1])
res = Counter(list) # 统计每个词性的
res = dict(res) # 转换成字典格式
with open("E:/NLTK.txt", mode='r', encoding='utf-8') as f:
nltk = eval(f.read()) # 所有词性字典
for key in res.keys():
nltk[key] = res[key] # 保存统计结果到词性字典
with open("E:/tagCount1.csv", mode='a', encoding='utf-8', newline='') as f1:
csv_writer1 = csv.writer(f1)
csv_writer1.writerow(nltk.values()) # 写入csv
# break
print(i)
#最后表格首行CC CD DT EX FW IN JJ JJR JJS LS MD NN NNS NNP NNPS PDT POS PRP PRP$ RB RBR RBS RP SYM TO UH VB VBD VBG VBN VBP VBZ WDT WP WP$ WRB
10-30
897

07-06
891

03-06
2433

11-19
3231
