python selenium在Ubuntu服务器上访问页面报403错误
经过排查得出
self.chrome_opts.add_argument("start-maximized")
self.chrome_opts.add_argument("enable-automation")
self.chrome_opts.add_argument("--disable-infobars")
self.chrome_opts.add_argument("--disable-dev-shm-usage")
self.chrome_opts.add_argument('--no-sandbox')
self.chrome_opts.add_argument("--disable-browser-side-navigation")
self.chrome_opts.add_argument("--headless")
self.chrome_opts.add_argument('window-size=1920x1080')
self.chrome_opts.add_argument('--disable-gpu')
在Windows中 注释了–headless 则可以正常访问,不注释则报403错误。
但是在Ubuntu服务器上不能注释,所以我们在代码中添加
self.uer_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
self.chrome_opts.add_argument('user-agent=%s'%self.uer_agent
即可。
self.chrome_opts = webdriver.ChromeOptions()
self.chrome_opts.add_argument("start-maximized")
self.chrome_opts.add_argument("enable-automation")
self.chrome_opts.add_argument("--disable-infobars")
self.chrome_opts.add_argument("--disable-dev-shm-usage")
self.chrome_opts.add_argument('--no-sandbox')
self.chrome_opts.add_argument("--disable-browser-side-navigation")
self.chrome_opts.add_argument("--headless")
self.chrome_opts.add_argument('window-size=1920x1080')
self.chrome_opts.add_argument('--disable-gpu')
##加入这个防止ubuntu服务器打开网页403
self.uer_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
self.chrome_opts.add_argument('user-agent=%s'%self.uer_agent)
self.path = os.getcwd()
##下载模块,下载到指定文件且不报错
prefs = {"download.prompt_for_download": False, 'download.default_directory': r'{}/exe'.format(self.path)}
self.chrome_opts.add_experimental_option('prefs', prefs)
self.driver = webdriver.Chrome("./chromedriver.exe", chrome_options=self.chrome_opts)
# self.driver = webdriver.Chrome("./chromedriver.exe", chrome_options=self.chrome_opts)
self.driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd': 'Page.setDownloadBehavior',
'params': {'behavior': 'allow', 'downloadPath': r"{}/exe".format(self.path)}}
self.driver.execute("send_command", params)
self.driver.implicitly_wait(30)
self.driver.maximize_window()
self.base_url = "http://www.huobiao.cn/"
self.driver.get(self.base_url)