from bs4 import BeautifulSoup
import requests
def paginate(url_template, page_num):
url = url_template.format(page_num)
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# 在这里处理当前页面的内容
divs = soup.find_all('div', class_='relative') # 限制class_='relative'
for div in divs:
links = div.find_all('a')
for link in links:
print(link.get('href'))
# thread=threading.Thread(target=download_images, args=(link.get('href'),)).start()
# threads.append(thread)
# sleep(30)
download_images(link.get('href'))
# 定义页面模板和起始页码
url_template = 'https://xxx.net/search?kw=xxx&page={}'
获取有多少页的代码 多少个翻页按钮
#获取有多少页
span = soup.find('span', class_='relative z-0 inline-flex flex-wrap shadow-sm rounded-md')
if span:
a_links = span.find_all('a')
num = len(a_links)
print(f"当前链接共有{num}页")
查找指定图片链接代码
image_links = [img['src'] for img in soup.find_all('img', class_='block my-1')]
image_names = [img['title'] for img in soup.find_all('img', class_='block my-1')]