Python 解析html BeautifulSoup ,
使用选择器根据class获取数据
rand_ip = 'rand ip'
proxies = {"https": rand_ip, }
referer = "https://sz.meituan.com/xuexipeixun/" + category_id + "/"
url = "https://sz.meituan.com/xuexipeixun/" + category_id + "/"
headers = {'referer': referer, 'user-agent': user_agent, 'cookie': cookie}
text = requests.get(url, headers=headers, proxies=proxies).text
html = BeautifulSoup(text, "lxml")
div = html.select('nav[class="mt-pagination"]')
if len(div) <= 0:
print("获取不到分页信息:", url)
return 0
a_items = div[0].select("a")
page_count = 1
for a_item in a_items:
if a_item and a_item.get_text() != "":
page = a_item.get_text()
if page_count < int(page):
page_count = int(page)
print(page_count)
其他
articList = html.select("div[class=arcList] ul li")
content_list = []
for item in articList:
title = item.select("a")[0].get("title")
href = item.select("a")[0].get("href")
datestr = item.select("span")[0].get_text()
articleTitle = html.select("div[class=articleTitle] h1")[0].get_text()
articleResource = html.select("div[class=articleResource]")[0].get_text()
articleDes = html.select("div[class=articleDes]")[0].get_text()
arcContent = html.select("div[class=arcContent]")[0].get_text()