【Python】学习笔记

请求头添加引号

import re

string = """

"""

pattern = '^(.*?): (.*)$'

for i in string.splitlines():
    headers = re.sub(pattern=pattern, repl='\'\\1\':\'\\2\',', string=i)
    print(headers)

selenium 隐藏爬虫特征

from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


def driverOptions():
    """
    初始化
    :return:
    """
    # 浏览器配置对象
    options = webdriver.ChromeOptions()
    # 以开发者模式运行浏览器
    options.add_experimental_option('excludeSwitches', ['enable-automation'])
    # 屏蔽以开发者运行提示框
    # options.add_experimental_option('useAutomationExtension', False)
    # 屏蔽保存密码提示框
    prefs = {'credentials_enable_service': False, 'profile.password_manager_enabled': False}
    options.add_experimental_option('prefs', prefs)
    # 增加对 Chrome version 88 或更高版本的爬虫特征处理
    options.add_argument('--disable-blink-features=AutomationControlled')
    # 浏览器对象
    driver = webdriver.Chrome(options=options)
    # 读取脚本 https://github.com/kingname/stealth.min.js
    with open('config/script/stealth.min.js', mode='r', encoding='utf-8') as f:
        js = f.read()
    # 移除 selenium 中的 window.navigator.webdriver 爬虫特征
    driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': js})
    # 窗口全屏显式
    driver.maximize_window()
    return driver


if __name__ == '__main__':
    chrome = driverOptions()


selenium 滑动滚动条

def scrollBar():
"""
滑动滚动条
"""
for x in range(1, 10, 2):
    time.sleep(0.5)
    j = x / 10
    js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f' % j
    chrome.execute_script(js)

selenium 加载本地 cookie 登陆

def login():
    """
    登录
    :return:
    """
    url = 'https://www.taobao.com/'
    # 访问链接
    chrome.get(url=url)
    # 删除浏览器访问链接建立的 cookie
    chrome.delete_all_cookies()
    # 读取本地 cookie
    with open('config/cookie/taobao.txt', mode='r', encoding='utf-8') as f:
        string = f.read()
    # 转换 json 格式
    cookies = json.loads(string)
    # 遍历 cookies
    for cookie in cookies:
        domain = cookie['domain'].split('.')
        # 添加 cookie
        chrome.add_cookie(
            {
                'domain': '.' + domain[-2] + '.' + domain[-1],
                'name': cookie['name'],
                'value': cookie['value'],
                'path': '/',
                'expires': None
            }
        )
    # 重新访问链接
    chrome.get(url=url)

selenium 判断元素是否存在

def isElementExist(x):
    """
    元素是否存在
    :param x:
    :return:
    """
    # 查找元素
    findElement = WebDriverWait(driver=chrome, timeout=5)
    # XPATH
    if '//' in x:
        try:
            findElement.until(EC.presence_of_element_located((By.XPATH, x)))
            element = chrome.find_element(by=By.XPATH, value=x)
            return element
        except:
            return False
    # ID
    else:
        try:
            findElement.until(EC.presence_of_element_located((By.ID, x)))
            element = chrome.find_element(value=x)
            return element
        except:
            return False

openpyxl 设置 excel 列宽,行高,插入图片,单元格格式等

  1. 自适应列宽 = (字符数*(字符宽度+间距)+边距)*0.125+0.62
  2. 列宽 = 像素*0.125+0.62
  3. 行高 = 像素*0.25
from openpyxl import load_workbook
from openpyxl.styles import Alignment
import requests
from openpyxl.drawing.image import Image
from PIL import Image as image
from io import BytesIO

wb = load_workbook('Excel.xlsx')
ws = wb.active
# 设置 A 列列宽
ws.column_dimensions['A'].width = 10 * 0.125 + 0.62
# 设置第一行行高
ws.row_dimensions[1].height = 10 * 0.75
# 单元格对齐格式 垂直水平居中自动换行
align = Alignment(horizontal='center', vertical='center', wrap_text=True)
for col in ws.columns:
    for cell in col:
        # 设置单元格格式居中垂直对齐
        cell.alignment = align
# 请求头
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
response = requests.get(ur='图片链接', headers=headers)
img = image.open(BytesIO(response.content))
img = Image(img)
img.width, img.height = 100, 100
# 插入图片
ws.add_image(img, 'A1')
wb.save('Excel.xlsx')
wb.close()

屏蔽Pandas读取非 Microsoft 创建的Excel错误警告

import pandas as pd
import warnings


with warnings.catch_warnings(record=True):
    warnings.simplefilter("always")
    # 读取数据
    df1 = pd.read_excel(filePath)
    # 屏蔽运行结束后的警告
    warnings.simplefilter('ignore', ResourceWarning)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值