爬取安徽省博物馆:
import requests
from bs4 import BeautifulSoup
url = 'http://www.ahm.cn/Service/Leaveword/zxzx#page='
def get_info(url, data=None):
wd_data = requests.get(url)
soup = BeautifulSoup(wd_data.text, 'lxml')
questions = soup.select('#articles > ul > li > div.question.item')
times = soup.select('#articles > ul > li > p > span:nth-child(2)')
replys = soup.select('#articles > ul > li > div.answer.item')
primary_class = '安徽省博物馆'
print(questions)
for question, time, reply in zip(questions, times, replys):
data = {
'question': question.get_text(),
'time': time.get_text(),
'reply': reply.get_text(),
'primary': primary_class
}
#print(data)
with open('安徽博物馆.txt', 'a', encoding='ut