自动分词+热词统计

最新推荐文章于 2022-08-30 17:08:02 发布

waiting&fighting

最新推荐文章于 2022-08-30 17:08:02 发布

阅读量281

点赞数

分类专栏：自然语言处理文章标签： nlp

本文链接：https://blog.csdn.net/qq_35090060/article/details/117254799

版权

自然语言处理专栏收录该内容

3 篇文章 0 订阅

订阅专栏

代码

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import xlrd
import numpy as np
import pandas as pd
import jieba
import collections
import xlwt

def readexcel(rPath):
       workbook = xlrd.open_workbook(rPath)
       #print(workbook.sheet_names())
       sheet = workbook.sheet_by_index(1)
       #构建ylist
       ylist = []
       for i in range(1, sheet.nrows):
              ylist.append(str(sheet.cell_value(rowx = i, colx = 1)))
       return ylist

def countWord(l):
    l = ''.join(l)
    l = list(jieba.lcut(l))
    word_counts = dict(collections.Counter(l))
    word_counts=dict(sorted(word_counts.items(), key=lambda x:x[1], reverse=True))
    print(word_counts)
    return word_counts

def write2Excel(wPath, word_counts):
    # 创建一个workbook 设置编码
    workbook = xlwt.Workbook(encoding = 'utf-8')
    # 创建一个worksheet
    worksheet = workbook.add_sheet('Sheet1')
    
    worksheet.write(0,0, '词汇')
    worksheet.write(0,1, '频率')
    
    row=1
    for k,v in word_counts.items():
         # 写入excel
        # 参数对应 行, 列, 值
        worksheet.write(row,0, k)
        worksheet.write(row,1, v)
        row+=1
    
    # 保存
    workbook.save(wPath)
    return
        

rPath = r'C:\Users\K43\Desktop\xppcode\热词统计.xlsx'
wPath = r'C:\Users\K43\Desktop\xppcode\热词统计-结果.xlsx'
ylist = readexcel(rPath)
result= countWord(ylist)
write2Excel(wPath, result)
#print(result,len(result.keys()))

waiting&fighting

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
自动分词+热词统计

代码# -*- coding: utf-8 -*-"""Spyder EditorThis is a temporary script file."""import xlrdimport numpy as npimport pandas as pdimport jiebaimport collectionsimport xlwtdef readexcel(rPath): workbook = xlrd.open_workbook(rPath) #pr
复制链接

扫一扫