问题描述:
python计算机二级综合应用,红楼梦统计人物词频。
问题解答:本人这道题代码写的有点拙劣,仅提供另一种思路的参考。
# 以下代码为提示框架
# 请在...处使用一行或多行代码替换
# 请在______处使用一行代码替换
#
# 注意:提示框架代码可以任意修改,以完成程序功能为准
import jieba
f = open("红楼梦.txt","r",encoding="utf-8")
sf = open("停用词.txt","r",encoding="utf-8")
s_fengjie = ["凤姐","凤姐儿","凤丫头"]
s_baoyu = ["宝玉","二爷","宝二爷"]
s_daiyu = ["黛玉","颦儿","林妹妹","黛玉道"]
s_baochai=["宝钗","宝丫头"]
s_jiamu=["贾母","老祖宗"]
s_xiren=["袭人","袭人道"]
s_jiazheng=["贾政","贾政道"]
s_jialian=["贾琏","琏二爷"]
ssf=[]
lines1=sf.readlines()
for line in lines1:
line=line.strip("\n")
ssf.append(line)
ls1=[]
lines2=f.readlines()
for line in lines2:
line=line.strip()
line=jieba.lcut(line)
for word in line:
ls1.append(word)
for i in range(len(ls1)):
if ls1[i] in s_fengjie:
ls1[i]= "凤姐"
elif ls1[i] in s_baoyu:
ls1[i]= "宝玉"
elif ls1[i] in s_daiyu:
ls1[i]= "黛玉"
elif ls1[i] in s_baochai:
ls1[i]= "宝钗"
elif ls1[i] in s_jiamu:
ls1[i]= "贾母"
elif ls1[i] in s_xiren:
ls1[i]= "袭人"
elif ls1[i] in s_jiazheng:
ls1[i]= "贾政"
elif ls1[i] in s_jialian:
ls1[i]= "贾琏"
else:
pass
d={}
for i in ls1:
if i not in ssf:
d[i]=d.get(i,0)+1
ls2=list(d.items())
ls2.sort(key=lambda x:x[1], reverse=True)
ls3=[]
for i in ls2:
if i[1]>=40:
if len(i[0])>=2:
ls3.append(i)
print(ls3)
import csv
# 打开一个新的 csv 文件,准备写入
with open('result.csv', 'w', newline='', encoding='utf-8') as csvfile:
csvwriter = csv.writer(csvfile)
# 遍历列表,将每个元组写入文件
for item in ls3:
csvwriter.writerow(item)
个人觉得难点,在于如何写入.csv文件中,个人对.csv文件不如.txt文件的写入操作熟悉,所以,.csv文件的写入操作还需要练习。
下面是系统给出的准确答案。
# 以下代码为提示框架
# 请在...处使用一行或多行代码替换
# 请在______处使用一行代码替换
#
# 注意:提示框架代码可以任意修改,以完成程序功能为准
import jieba
f = "红楼梦.txt"
sf = "停用词.txt"
txt = jieba.lcut(open(f, 'r', encoding = 'utf-8').read())
stop_words = []
with open(sf, 'r', encoding = 'utf-8') as f:
for i in f.read().splitlines():
stop_words.append(i)
#剔除停用词
txt0 = [x for x in txt if x not in stop_words]
#统计词频
counts = {}
for word in txt0:
if len(word) == 1:
continue
elif word == '凤姐儿' or word == '凤丫头':
rword = '凤姐'
elif word == '二爷' or word == '宝二爷':
rword = '宝玉'
elif word == '颦儿' or word == '林妹妹' or word == '黛玉道':
rword = '黛玉'
elif word == '宝丫头':
rword = '宝钗'
elif word == '老祖宗':
rword = '贾母'
elif word == '袭人道':
rword = '袭人'
elif word == '贾政道':
rword = '贾政'
elif word == '琏二爷':
rword = '贾琏'
else:
rword = word
counts[rword] = counts.get(rword,0) + 1
li = list(counts.items())
li.sort(key=lambda x:x[1], reverse=True)
print(li)
#列出词频超过40的结果
with open(r'result.csv','a', encoding = 'gbk') as f:
for i in li:
key,value = i
if value < 40:
break
f.write(key + ',' + str(value) + '\n')
print(key + ',' + str(value))