import requests
from bs4 import BeautifulSoup
import time
# 异步加载的网址规律
url = 'https://knewone.com/discover?page='
def get_page(url,data=None):
respones = requests.get(url)
soup = BeautifulSoup(respones.text,'lxml')
title = soup.select('section > h4 > a')
img = soup.select('header > a > img')
links = soup.select('header > a')
if data==None:
for title,img,links in zip(title,img,links):
data = {
'title' : title.text,
'img' : img.get('src'),
'links': "https://knewone.com"+links.get('href')
}
print(data)
def get_more_page(start,end):
for i in range(start,end):
get_page(url+str(i))
get_more_page(1,10)