爬虫词云
“”"
Created on Thu Dec 2 20:00:52 2021
@author: lenovo
“”"
import jieba
import wordcloud
from imageio import imread
import requests
from bs4 import BeautifulSoup
path = “.//article//”
def get_txt(url):
“”“爬取文本内容并命名写入文件夹”""
try:
headers={
‘User-Agent’:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36’
}
response = requests.get(url=url,headers=headers)
response.encoding = response.apparent_encoding
page_text = response.text
soup = BeautifulSoup(page_text,‘lxml’)
a_lists = soup.select(‘ul[class=conlun5] a’)
for a in a_lists:
FilePath = path+a.text +’.txt’
f = open(FilePath,‘w+’,encoding=‘utf-8’)
#tags = soup.find(‘div’,attrs={“id”:“conlun2_box_text”