请求头添加引号
import re
string = """
"""
pattern = '^(.*?): (.*)$'
for i in string.splitlines():
headers = re.sub(pattern=pattern, repl='\'\\1\':\'\\2\',', string=i)
print(headers)
selenium 隐藏爬虫特征
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def driverOptions():
"""
初始化
:return:
"""
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
prefs = {'credentials_enable_service': False, 'profile.password_manager_enabled': False}
options.add_experimental_option('prefs', prefs)
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=options)
with open('config/script/stealth.min.js', mode='r', encoding='utf-8') as f:
js = f.read()
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': js})
driver.maximize_window()
return driver
if __name__ == '__main__':
chrome = driverOptions()
selenium 滑动滚动条
def scrollBar():
"""
滑动滚动条
"""
for x in range(1, 10, 2):
time.sleep(0.5)
j = x / 10
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f' % j
chrome.execute_script(js)
selenium 加载本地 cookie 登陆
def login():
"""
登录
:return:
"""
url = 'https://www.taobao.com/'
chrome.get(url=url)
chrome.delete_all_cookies()
with open('config/cookie/taobao.txt', mode='r', encoding='utf-8') as f:
string = f.read()
cookies = json.loads(string)
for cookie in cookies:
domain = cookie['domain'].split('.')
chrome.add_cookie(
{
'domain': '.' + domain[-2] + '.' + domain[-1],
'name': cookie['name'],
'value': cookie['value'],
'path': '/',
'expires': None
}
)
chrome.get(url=url)
selenium 判断元素是否存在
def isElementExist(x):
"""
元素是否存在
:param x:
:return:
"""
findElement = WebDriverWait(driver=chrome, timeout=5)
if '//' in x:
try:
findElement.until(EC.presence_of_element_located((By.XPATH, x)))
element = chrome.find_element(by=By.XPATH, value=x)
return element
except:
return False
else:
try:
findElement.until(EC.presence_of_element_located((By.ID, x)))
element = chrome.find_element(value=x)
return element
except:
return False
openpyxl 设置 excel 列宽,行高,插入图片,单元格格式等
- 自适应列宽 = (字符数*(字符宽度+间距)+边距)*0.125+0.62
- 列宽 = 像素*0.125+0.62
- 行高 = 像素*0.25
from openpyxl import load_workbook
from openpyxl.styles import Alignment
import requests
from openpyxl.drawing.image import Image
from PIL import Image as image
from io import BytesIO
wb = load_workbook('Excel.xlsx')
ws = wb.active
ws.column_dimensions['A'].width = 10 * 0.125 + 0.62
ws.row_dimensions[1].height = 10 * 0.75
align = Alignment(horizontal='center', vertical='center', wrap_text=True)
for col in ws.columns:
for cell in col:
cell.alignment = align
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
response = requests.get(ur='图片链接', headers=headers)
img = image.open(BytesIO(response.content))
img = Image(img)
img.width, img.height = 100, 100
ws.add_image(img, 'A1')
wb.save('Excel.xlsx')
wb.close()
屏蔽Pandas读取非 Microsoft 创建的Excel错误警告
import pandas as pd
import warnings
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
df1 = pd.read_excel(filePath)
warnings.simplefilter('ignore', ResourceWarning)