Python词频统计与杨辉三角

词频统计及个性化输出

1. 所需库的安装

由于正常操作安装太慢,所以使用豆瓣的镜像库进行安装

pip3 install jieba -i https://pypi.douban.com/simple

pip3 install wordcloud -i https://pypi.douban.com/simple

pip3 install imageio -i https://pypi.douban.com/simple

2. jieba库小Demo

import jieba
jieba.lcut("中国是一个伟大的国家")
['中国', '是', '一个', '伟大', '的', '国家']

3. CalHamlet 字符统计

def getText():
    txt = open("hamlet.txt","r").read()
    txt = txt.lower()
    for ch in '!"#$%^&*()+_-=,./:;<>?[\\]{|}`~':
        txt = txt.replace(ch," ")
    return txt

hamletTxt = getText()
words = hamletTxt.split()
counts = {}
for word in words:
    counts[word] = counts.get(word,0)+1
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
    word,count = items[i]
    print ("{0:<10}{1:>5}".format(word,count))
the        1138
and         965
to          754
of          669
you         550
i           542
a           542
my          514
hamlet      462
in          436

4. CalHamlet 字符统计 过滤

excludes = {"the","and","of","you","a","i","my","in","to","that","is"
            ,"it","not","his","this","but","with","for","your","me","be"
           ,"as","he","what","him","so","have","will","do","no","we","are"
           ,"all","on","our","by","or","shall","if","o","good","come","thou"
           ,"they","now","more","let","from","her","how","at","thy"}
def getText():
    txt = open("./资源/hamlet.txt","r").read()
    txt = txt.lower()
    for ch in '!"#$%^&*()+_-=,./:;<>?[\\]{|}`~':
        txt = txt.replace(ch," ")
    return txt

hamletTxt = getText()
words = hamletTxt.split()
counts = {}
for word in words:
    counts[word] = counts.get(word,0)+1
for word in excludes:
    del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
    word,count = items[i]
    print ("{0:<10}{1:>5}".format(word,count))
hamlet      462
lord        309
king        194
horatio     157
claudius    120
queen       117
polonius    116
laertes     103
gertrude     95
ophelia      86

5.三国演义 人物出场统计

import jieba
txt = open("./资源/三国演义.txt",'r',encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
    if len(word)==1:
        continue
    else:
        counts[word] = counts.get(word,0)+1
items = list(counts.items())
items.sort(key = lambda y:y[1],reverse=True)
for i in range(15):
    word , count  = items[i]
    print ("{0:<10}{1:>5}".format(word,count))
曹操          934
孔明          831
将军          759
却说          647
玄德          570
关公          509
丞相          488
二人          463
不可          435
荆州          420
孔明曰         384
玄德曰         383
不能          383
如此          376
张飞          348

6.三国演义 人物出场统计 过滤版

import jieba
excludes = {"将军","却说","荆州","二人","不可","不能","如此","商议"}
txt = open("./资源/三国演义.txt","r",encoding ='utf-8').read()
words = jieba.lcut(txt)
counts ={}
for word in words:
    if len(word)==1:
        continue
    elif word =="诸葛亮"or word=="孔明曰":
        rword == "孔明"
    elif word =="关公"or word=="云长":
        rword = "关羽"
    elif word =="玄德"or word=="玄德曰":
        rword = "刘备"
    elif word =="孟德"or word=="丞相":
        rword = "曹操"
    else:
        rword = word
    counts[rword] = counts.get(rword,0)+1
for word in excludes:
    del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse = True)
for i in range(5):
    word,count = items[i]
    print("{0:<10}{1:>5}".format(word,count))        
曹操         1435
刘备         1228
孔明          839
关羽          779
张飞          348

7.杨辉三角

def NumList_to_StrList(data):
    new_data = []
    for i in range(len(data)):
        new_data.append(str(data[i]))
    string = ' '.join(new_data)
    return string


def YangHui(n):
    width = n * 6
    print('1'.center(width))
    line = [1,1]
    print('1 1'.center(width))
    
    for i in range(2,n):
        r = []
        for j in range(0,len(line) - 1):
            r.append(line[j] + line[j + 1])
        line = [1] + r + [1]
        print(NumList_to_StrList(line).center(width))


YangHui(6)

                 1                  
                1 1                 
               1 2 1                
              1 3 3 1               
             1 4 6 4 1              
           1 5 10 10 5 1            

8.简单的词云小程序

import jieba
import wordcloud
import imageio
f = open("./资源/三国演义.txt","r",encoding ='utf-8').read()
ls = jieba.lcut(f)
txt = " ".join(ls)
m = imageio.imread("./资源/duye.jpg")
font = r'c:/Windows/Fonts/simfang.ttf'
w = wordcloud.WordCloud(background_color="white",font_path =font ,width=1000,height=1000,mask=m).generate(txt)
w.to_file("./资源/test.png")

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值