python期末大作业_一个完整的python大作业

importrequestsimportrefrom bs4 importBeautifulSoupfrom datetime importdatetimeimportpandasimportsqlite3importjiebafrom wordcloud importWordCloudimportmatplotlib.pyplot as plt

url= "http://www.xinhuanet.com/"f=open("css.txt","w+")

res0=requests.get(url)

res0.encoding="utf-8"soup= BeautifulSoup(res0.text,"html.parser")

newsgroup=[]for news in soup.select("li"):if len(news.select("a"))>0:print(news.select("a")[0].text)

title=news.select("a")[0].text

f.write(title)

f.close()

f0= open('css.txt','r')

qz=[]

qz=f0.read()

f0.close()print(qz)

words=list(jieba.cut(qz))

ul={':','的','"','、','”','“','。','!',':','?',' ','\u3000',',','\n'}

dic={}

keys= set(words)-ulfor i inkeys:

dic[i]=words.count(i)

c=list(dic.items())

c.sort(key=lambda x:x[1],reverse=True)

f1= open('diectory.txt','w')for i in range(10):print(c[i])for words_count in range(c[i][1]):

f1.write(c[i][0]+' ')

f1.close()

df=pandas.DataFrame(words)print(df.head())

with sqlite3.connect('newsdb3.sqlite') as db:

df.to_sql('newsdb3',con =db)

f3= open('diectory.txt','r')

cy_file=f3.read()

f3.close()

cy=WordCloud().generate(cy_file)

plt.imshow(cy)

plt.axis("off")

plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值