不完整代码
def page_next(url):
age = header(url)
ponse = etree.HTML(age)
page = ponse.xpath('//div[@class="ewb-page"]//li[contains(@class,"ewb-page-hover")][2]/a/@href')#下一页的标签链接(只有一半,需要和域名拼接)
# print(page)
if page:#判断
# url = domain +page[0]
# print(url)
next_url = parse.urljoin(domain, page[0])#如果下一页无主域名则进行拼接(parse.urljoin),需引用from urllib import parse库,domain赋值的域名
print(next_url)#打印下一页链接
the_parse(next_url)#再次执行解析函数
page_next(next_url)#循环
if __name__ == "__main__":
url = "http://www.sxggzyjy.cn/jydt/001001/001001001/001001001005/subPage_jyxx.html"
the_parse(url)#先执行解析代码函数,第一页数据获取后
page_next(url)#在执行判断下一页函数