1.应用python库
from urllib import request
import ssl
from bs4 import BeautifulSoup
import sys
import io
import json
import time
import random
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from mitmproxy import ctx
2.主要思路
1.首先用urllib的request请求要爬取的页面:
# 请求
req = request.Request(url)
# 设置cookie
file = open('cookie.txt', 'r+', encoding='utf-8')
cookie_str = file.read()
file.close()
# print('cookie_str',cookie_str)
req.add_header('cookie', cookie_str)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.372