# !/usr/bin/python3
# -*- coding:utf-8 -*-
"""
@author: JHC000abc@gmail.com
@file: demo.py
@time: 2024/1/30 10:46
@desc:
"""
import json
import os
import time
import datetime
import random
import traceback
from DrissionPage import WebPage, ChromiumOptions, SessionOptions
class DrissionPageDemo(object):
"""
"""
def __init__(self):
"""
"""
def get_page(self, ua=None, incognito=False, time_out=60, headless=False, cookies=None):
"""
:param ua:
:param incognito:
:return:
"""
self.page = None
co = ChromiumOptions()
so = SessionOptions()
if cookies:
so.set_cookies(cookies)
if ua:
co.set_user_agent(user_agent=ua)
co.incognito(incognito)
co.set_argument('--window-size', '800,600')
co.auto_port(True)
co.headless(headless)
co.ignore_certificate_errors(True)
co.mute(True)
co.set_timeouts(page_load=time_out)
self.page = WebPage(driver_or_options=co, session_or_options=so)
return self.page
def save_result(self, html):
"""
:param html:
:return:
"""
os.makedirs("result", exist_ok=True)
file = "result/result_{}.html".format(
datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S"))
with open(file, "w", encoding="utf-8")as fp:
fp.write(html)
def read_file(self, file):
"""
:param file:
:return:
"""
res = []
with open(file, "r", encoding="utf-8")as fp:
for i in fp:
res.append(i.strip())
return res
def read_json_file(self, file):
"""
:param file:
:return:
"""
with open(file, "r", encoding="utf-8")as fp:
return json.loads(fp.read())
def check_load_over(self):
"""
:return:
"""
def process(self):
"""
:return:
"""
url_list = self.read_file("url.list")
ua_list = self.read_file("ua.list")
cookie_list = self.read_file("cookie.list")
settings = self.read_json_file("settings.json")
time_out = settings["time_out"]
wait = settings["wait"]
save = settings["save"]
incognito = settings["incognito"]
headless = settings["headless"]
while True:
try:
if ua_list:
ua = random.choices(ua_list)[0]
else:
ua = None
if cookie_list:
cookie = random.choices(cookie_list)
else:
cookie = None
self.get_page(ua, incognito, time_out, headless, cookie)
url = random.choices(url_list)[0]
print(f"url : {url}")
self.page.get(url, retry=3, interval=3)
if save:
html = self.page.html
self.save_result(html)
self.page.quit()
if len(wait) == 1:
_sleep = wait[0]
else:
_sleep = random.randint(wait[0], wait[1])
while _sleep >= 0:
print(f"SLEEP : {_sleep}", end='\r')
time.sleep(1)
_sleep -= 1
except BaseException:
print(traceback.print_exc())
os.system("taskkill /F /IM chrome.exe")
if __name__ == '__main__':
dp = DrissionPageDemo()
dp.process()
settings.json
{
"wait": [3,5],
"time_out": 60,
"save": true,
"incognito": false,
"headless": false
}
cookie.list(仅供参考)
key1=val1; domain=xxxx
key2=val2
domain=xxxx
ua.list
Mozilla/8.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36
url.list
https://mp.weixin.qq.com/s?__biz=MzU4ODE2NjM2NQ==&mid=2247635836&idx=2&sn=7a81a29ca1d2b6156f199b1c6215b089&chksm=fdec76ceca9bffd81d2c7c189e178ab20ad2d89e6ded9cd1613850a0ef90e735c732d726a423&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2NjM2NQ==&mid=2247635757&idx=3&sn=3f3f5936bac7603acb5121bdc4dbcb8d&chksm=fdec769fca9bff89a5826cc8c50fbb291a4f7818fecdc2e8db8fb1fe4cd6aacf9da2fb24dd0b&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2NDI1NQ==&mid=2247487316&idx=1&sn=99d7e3f83b0fa535e7da01313c3acbda&chksm=fde1b2a4ca963bb2a3596d3e43d96748168813d63e3ed89540459dc18978da61744334be4bd8&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2NDI1NQ==&mid=2247486852&idx=1&sn=74e77bca1a8b27cc33ca06d598e9f57b&chksm=fde1b074ca96396279eb20e1f74b9562a1400b469027458200dbdab5939009947fa90d07474e&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2OTQxMw==&mid=2247592659&idx=4&sn=416bcd70ffd8564d19e1661ced116dc1&chksm=fde3c379ca944a6ff237a409ac082640cdb5f4569129e725a55573b0061edd181c33bd3da965&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2NjM2NQ==&mid=2247637851&idx=2&sn=3679c3c11fde6b38ee425e5e57fa13b7&chksm=fdec7ee9ca9bf7ff9bbb88ac87c0984468ec7d9caa9f556261785b1192b22176685e149b4e61&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2OTQxMw==&mid=2247592680&idx=1&sn=9abc42defc2de487ef60b2a290c5bfbe&chksm=fde3c342ca944a5473338d705cae7d99e1a9bde43788311ddbaefbad8afa67ffe3fd9d7517fd&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2OTQxMw==&mid=2247592680&idx=4&sn=a116c48fe79d581491a530c1d34d7aa4&chksm=fde3c342ca944a54595fac8de4e103aeed854ac189a0acd9a53a40935c30a9db62dfb06885fb&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2OTQxMw==&mid=2247592680&idx=3&sn=52e49fc3594f7ec20546bab2358aa3d0&chksm=fde3c342ca944a540199bff93f4f9a8222d9ef6b57f6cf8bfa382ba259e2b180cf31c0e2b715&scene=27
https://mp.weixin.qq.com/s?__biz=MzU4ODE2OTQxMw==&mid=2247592698&idx=5&sn=7c4b6ba23d0fb492945415f892fee38a&chksm=fde3c350ca944a461896ecf52dfd0c6e66c46c2cf175791e9335d561f9608609ea1dfe59d26a&scene=27