反爬
反反爬:
1.请求头伪造
2.多次采集数据 Time.sleep(random)
3.ip地址的代理(推荐
import urllib.request
from urllib import request
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"
}
url = "https://www.baidu.com"
response = request.Request(url=url,headers=headers)
resp = request.urlopen(response)
data = resp.read()
print(data)
with open("baidu.html","wb") as f:
f.write(data)
from urllib import request
import random
us = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50
(KHTML, like Gecko) Version/5.1 Safari/534.50"
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"