def get_detail(url,dbs):
#每个网页设置延时
time.sleep(random.random() * 3)
selector = comp(url)
title_url = selector.xpath('//div[@class="topic_list_detail"]//a[@class="title"]/@href')
title_title = selector.xpath('//div[@class="topic_list_detail"]//a[@class="title"]/text()')
try:
for index in range(len(title_url)):
result_url = "https://www.chineseinla.com" + title_url[index]
result = {'标题': title_title[index], 'url': result_url}
dbs.insert_one(result)
nexturl = selector.xpath('//div[@class="topic_option_right pagination_right"]/a[last()]/@href')
next_text = selector.xpath('//div[@class="topic_option_right pagination_right"]/a[last()]/text()')
print("下一页为:{}".format(nexturl))
print(next_text)
if not nexturl:
return
elif next_text[0] == "下一页":
next_url = "https://www.chineseinla.com" + nexturl[0]
get_detail(next_url, dbs)
else:
return
except Exception as e:
print(e)