由于详情页数据无法破解、
模拟请求详情页数据会出现一个滑块,手动划也可以不能通过
结果
只能拿到列表页上面包含的信息
import requests.sessions
from lxml import etree
from openpyxl import workbook
import time
import json
def get_category_url_list(url, session, headers, sheet):
try:
# 关闭多余连接
session.keep_alive = False
res = session.get(url, headers=headers)
html = etree.HTML(res.text)
category_url_list = html.xpath('//ul[@class="parent-menu clearfix"]/li/a/@href')
category_url_list = category_url_list[1:3]
# print(category_url_list)
for i in category_url_list:
get_second_category_url_list(i, session, sheet)
except Exception as e:
print(e)
return category_url_list
def get_second_category_url_list(category_url, session, sheet):
try:
# 关闭多余连接
session.keep_alive = False
res = session.get(category_url)
html = etree.HTML(res.text)
second_category_url_list = html.xpath('//li[@class="parent-cate"]/div/a/@href')
# print(second_category_url_list)
for i in second_category_url_list