问题描述:
python计算机二级综合应用题,两年工作报告词频统计。
问题解答:
本人写的代码有点拙劣,运行效率可能不高,仅作为一种思路。
第一问:
#
# 以下代码仅供参考。
#
#
# 以下代码仅供参考。
#
import jieba
f1=open("data2019.txt","r")
lines=f1.readlines()
d = {}
for line in lines:
line = line.strip()
line=jieba.lcut(line)
for word in line:
if len(word)>=2:
d[word]=d.get(word,0)+1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2019:",end="")
for i in range(9):
print(lt[i][0]+":"+str(lt[i][1]),end=",")
print(lt[9][0]+":"+str(lt[9][1]))
f2=open("data2018.txt","r")
lines=f2.readlines()
d = {}
for line in lines:
line = line.strip()
line=jieba.lcut(line)
for word in line:
if len(word)>=2:
d[word]=d.get(word,0)+1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2018:",end="")
for i in range(9):
print(lt[i][0]+":"+str(lt[i][1]),end=",")
print(lt[9][0]+":"+str(lt[9][1]))
第二问:
#
# 以下代码仅供参考。
#
#
# 以下代码仅供参考。
#
import jieba
f1=open("data2019.txt","r")
lines=f1.readlines()
d = {}
for line in lines:
line = line.strip()
line=jieba.lcut(line)
for word in line:
if len(word)>=2:
d[word]=d.get(word,0)+1
lt1 = list(d.items())
lt1.sort(key = lambda x:x[1],reverse = True)
set1 = set()
for i in lt1[0:10]:
set1.add(i[0])
f2=open("data2018.txt","r")
lines=f2.readlines()
d = {}
for line in lines:
line = line.strip()
line=jieba.lcut(line)
for word in line:
if len(word)>=2:
d[word]=d.get(word,0)+1
lt2 = list(d.items())
lt2.sort(key = lambda x:x[1],reverse = True)
set2 = set()
for i in lt2[0:10]:
set2.add(i[0])
set_jiao=set1&set2
set_cha2019= set1 -set2
set_cha2018= set2 -set1
print("共有词语:" + ",".join(set_jiao))
print("2019特有:" + ",".join(set_cha2019))
print("2018特有:" + ",".join(set_cha2018))