day3.21-常见的反爬
01.代理
import requests
def get_html(url):
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}
response = requests.get(url, headers=headers,proxies={'https':'114.106.157.166:4545'})
response = requests.get(url)
print(response.text)
return response.text
if __name__ == '__main__':
url = 'https://movie.douban.com/top250'
get_html(url)
02.selenium使用代理
from selenium.webdriver import Chrome,ChromeOptions
options = ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
options.add_argument('--proxy-server=http://115.207.203.163:4531')
b = Chrome(options=options)
url = 'https://movie.douban.com/top250'
b.get(url)
b.close()
03.requests自动登录获取cookie
from selenium.webdriver import Chrome
b = Chrome()
b.get('https://www.taobao.com/')
input('是否完成:')
cookies = b.get_cookies()
open('files/taobao.txt','w',encoding='utf-8').write(str(cookies))
04.requests自动登录使用
from selenium.webdriver import Chrome
b = Chrome()
b.get('https://www.taobao.com/')
cookie_list = eval(open('files/taobao.txt',encoding='utf-8').read())
for cookie in cookie_list:
b.add_cookie(cookie)
b.get('https://www.taobao.com/')
04.自体反爬