第一版
import functools
import json
import pprint
import re
import time
import traceback
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from tqdm import tqdm
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
default_headers = {
'User-Agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/92.0.4515.131Safari/537.36',
'sec-ch-ua': '"Chromium";v="92","NotA;Brand";v="99","GoogleChrome";v="92"',
'sec-ch-ua-mobile': '?0'}
def tryError(fn):
"""
装饰器 直接对函数的运行try-catch
:param fn:
:return:
"""
@functools.wraps(fn)
def wrapper(*args, **kvargs):
try:
ret = fn(*args, **kvargs)
return ret
except Exception as e:
print("=========================")
msg = f'出错原因: {
e.__doc__} {
e} \n函数名: {
fn.__name__}\n函数说明: {
fn.__doc__}'
print(msg)
print("=========================")
time.sleep(0.2)
traceback.print_exc()
return wrapper
@tryError
def getText(url: str, headers: dict = None, params: dict = None) -> str:
"""
获取返回的Text
:param params:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
t_headers['Accept-Encoding'] = 'utf-8'
t_headers['Accept-Language'] = 'zh-CN,zh;q=0.9'
with requests.session() as session, session.get(url=url, headers=t_headers, verify=False, params=params) as resp:
resp.encoding = resp.apparent_encoding
return resp.text
@tryError
def postText(url: str, headers: dict = None, datas: dict = None) -> str:
"""
获取返回的Text post提交
:param datas:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
t_headers['Accept-Encoding'] = 'utf-8'
t_headers['Accept-Language'] = 'zh-CN,zh;q=0.9'
with requests.session() as session, session.post(url=url, headers=t_headers, verify=False, data=datas) as resp:
resp.encoding = resp.apparent_encoding
return resp.text
@tryError
def getJson(url: str, headers: dict = None, params: dict = None) -> dict:
"""
获取返回的Json
:param params:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
t_headers['Accept-Encoding'] = 'utf-8'
t_headers['Accept-Language'] = 'zh-CN,zh;q=0.9'
return json.loads(getText(url=url, headers=t_headers, params=params))
@tryError
def writeBin(filePath: str, url: str, headers: dict = None) -> None:
"""
从网络上面下载二进制文件到本地
:param filePath:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
with requests.session() as session, session.get(url=url, headers=t_headers, stream=True, verify=False) as resp:
with open(filePath, 'wb+') as wf:
wf.write(resp.content)
print(f'{
filePath} down ok !')
@tryError
def writeText(filePath: str, url: str, headers: dict = None) -> None:
"""
从网络上面下载文本文件到本地
:param filePath:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
t_headers['Accept-Encoding'] = 'utf-8'
t_headers['Accept-Language'] = 'zh-CN,zh;q=0.9'
with requests.session() as session, session.get(url=url, headers=t_headers, stream=True, verify=False) as resp:
with open(filePath, 'w', encoding='utf-8') as wf:
wf.write(resp.text)
print(f'{
filePath} down ok !')
@tryError
def writeBinBar(filePath: str, url: str, headers: dict = None) -> None:
"""
从网络上面下载二进制文件到本地
:param filePath:
:param url:
:param headers:
:return:
"""
t_headers = headers if headers else default_headers
with requests.session() as session, session.get(url=url, headers=t_headers, stream=True, verify=False) as resp:
fileSize = int(resp.headers['content-Length'])
with tqdm(initial=0, total=fileSize, unit_scale=True, unit='B') as pbar:
with open(filePath, 'wb+') as wf:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
wf.write(chunk)
pbar.update(len(chunk))
print(f'{
filePath} down ok !')
@tryError
def getHeadersKV(pstr=''):
"""
获取Headers KV
:return:
"""
filePath = 'getKV.txt'
if pstr == '':
with open(filePath, 'r', encoding='utf-8') as rf:
string = rf.read()
else:
string = pstr
s = string.replace(' ', '').splitlines()
s = [item for item in s if item != '']
headers = {
}
for item in s:
key = item.split(':')[0]
value = item.replace(fr'{
key}:', '').strip()
headers[key] = value
print("headers=", end='')
pprint.pprint(headers)
def removeBlank(pstr: str) -> str:
"""
移除字符串中的空白 利用正则表达式
:param pstr:
:return:
"""
return re.sub(re.compile(r'\s+', re.S), '', pstr)
@tryError
def getListByRe(restr: str, url: str, headers: dict = None) -> list[list]:
"""
用正则表达式从文本提取 所有符合条件的
:param restr:
:param url:
:param headers:
:return:
"""
getHtml = getText(url=url, headers=headers)
getHtml = removeBlank(getHtml)
names = re.compile(r'P<(?P<getName>.*?)>', re.S).findall(restr)
print(names)
finditer = re.compile(restr, re.S).finditer(getHtml)
tasks = []
for item in finditer:
task = []
for name in names:
task.append(item.group(name))
tasks.append(task)
return tasks
@tryError
def getSingleByRe(restr: str, url: str, headers: dict = None) -> list:
"""
用正则表达式从文本提取 第一个符合条件的
:param restr:
:param url:
:param headers:
:return:
"""
getHtml = getText(url=url, headers=headers)
getHtml = removeBlank(getHtml)
names = re.compile(r'P<(?P<getName>.*?)>', re.S).findall(restr)
search = re.compile(restr, re.S).search(getHtml)
task = []
for name in names:
task.append(search.group(name))
return task
if __name__ == '__main__':
getHeadersKV()
第二版
import datetime
import functools
import json