python计算机二级综合应用题，两年工作报告词频统计。

神笔馬良

于 2024-05-20 11:54:26 发布

阅读量144

点赞数 1

文章标签： python windows 开发语言

本文链接：https://blog.csdn.net/weixin_43501408/article/details/139059621

版权

问题描述：

python计算机二级综合应用题，两年工作报告词频统计。

问题解答：

本人写的代码有点拙劣，运行效率可能不高，仅作为一种思路。

第一问：

# 
# 以下代码仅供参考。
# 
# 
# 以下代码仅供参考。
#
import jieba
f1=open("data2019.txt","r")
lines=f1.readlines()
d = {}
for line in lines:
    line = line.strip()
    line=jieba.lcut(line)
    for word in line:
        if len(word)>=2:
            d[word]=d.get(word,0)+1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2019:",end="")
for i in range(9):
    print(lt[i][0]+":"+str(lt[i][1]),end=",")
print(lt[9][0]+":"+str(lt[9][1]))



f2=open("data2018.txt","r")
lines=f2.readlines()
d = {}
for line in lines:
    line = line.strip()
    line=jieba.lcut(line)
    for word in line:
        if len(word)>=2:
            d[word]=d.get(word,0)+1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2018:",end="")
for i in range(9):
    print(lt[i][0]+":"+str(lt[i][1]),end=",")
print(lt[9][0]+":"+str(lt[9][1]))

第二问：

# 
# 以下代码仅供参考。
# 

# 
# 以下代码仅供参考。
# 
import jieba
f1=open("data2019.txt","r")
lines=f1.readlines()
d = {}
for line in lines:
    line = line.strip()
    line=jieba.lcut(line)
    for word in line:
        if len(word)>=2:
            d[word]=d.get(word,0)+1
lt1 = list(d.items())
lt1.sort(key = lambda x:x[1],reverse = True)
set1 = set()
for i in lt1[0:10]:
    set1.add(i[0])

f2=open("data2018.txt","r")
lines=f2.readlines()
d = {}
for line in lines:
    line = line.strip()
    line=jieba.lcut(line)
    for word in line:
        if len(word)>=2:
            d[word]=d.get(word,0)+1
lt2 = list(d.items())
lt2.sort(key = lambda x:x[1],reverse = True)
set2 = set()
for i in lt2[0:10]:
    set2.add(i[0])

set_jiao=set1&set2
set_cha2019= set1 -set2
set_cha2018= set2 -set1
print("共有词语:" + ",".join(set_jiao))
print("2019特有:" + ",".join(set_cha2019))
print("2018特有:" + ",".join(set_cha2018))