频繁的浏览不符合正常用户的行为,故可以让爬虫间断性休息
import requests
from bs4 import BeautifulSoup
import time
import random
link = "http://www.santostang.com/"
def scrap(link):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
r = requests.get(link,headers = headers)
soup = BeautifulSoup(r.text,"lxml")
return soup
soup = scrap(link)
title_list = soup.find_all("h1",class_<