from bs4 import BeautifulSoup
import requests
import time
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
'cookie':'_gat=1; gr_user_id=878a8f9c-aaad-4d7d-9341-8bb6d87828bc; _knewone_v2_session=eWJpdE5RTXZkbWxLU3FNalBZZk9lM09kSmE1My8vNUJvRCtCaDBpNXNLWm12dklxZ1NyWHd2Q2FEay9UNEV2WEVUS1V2Y1FQTVRrNlY3SlZPM2FidWhFQmhGOEZFd2U0NGpxT0xXall0bGpvMmVaTm00NEdraGFxTzBIUGdyeDkxbkNXQ1FkVWhaQzRrSzFESDM3Q1gwS3ZoSVd0QlFZRm90WGIybW9qb05jb3RsQ2ZwamlWYm9rU2U2MmVubDluLS1seXN2RXdjVzU1TGo4cFo2Zm1XdCtBPT0%3D--e90fa3ccbe2e3743ebe62ecf131580888682c9c9; _ga=GA1.2.1501286034.1482382737; Hm_lvt_b44696b80ba45a90a23982e53f8347d0=1482382738; Hm_lpvt_b44696b80ba45a90a23982e53f8347d0=1482382798; gr_session_id_e7b7e334c98d4530928513e7439f9ed2=184b0ab0-6d68-4a9d-87e8-8c2bf46fe653'
}
url='https://knewone.com/discover?page='
def get_page(url,data=None):
wb_data=requests.get(url,headers=headers)
soup=BeautifulSoup(wb_data.text,'lxml')
titles=soup.select('section.content > h4 > a')
links=soup.select('section.content > h4 > a')
imgs=soup.select('a.cover-inner > img')
if data==None:
for img,title,link in zip(imgs,titles,links):
data={
'img':img.get('src'),
'title':title.get('title'),
'link':link.get('href')
}
print(data)
def get_more_page(start,end):
for one in range(start,end):
get_page(url+str(one))
#time.sleep(1)
get_more_page(1,30)
转载于:https://www.cnblogs.com/lhfen/p/6211001.html