python请求有关ja3指纹问题

遇见一个网站采集,无论怎样都返回空数据(实际上是有数据的),但是抓包下来又确实是那样的,请教了一些人推测是指纹验证,拜读了网上其他大佬的博客文章后实验了一下,发现确实是这个问题!
第一次知道tcp还有这个东西,让我大受震撼,值此搬运一下。

参考链接及来源:
Python 爬虫进阶必备 | JA3 指纹在爬虫中的应用与定向突破
python爬虫 requests、httpx、aiohttp、scrapy突破ja3指纹识别

实例:

requests

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.ssl_ import create_urllib3_context
import requests
import random
ORIGIN_CIPHERS = ('ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
                  'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES')


class DESAdapter(HTTPAdapter):
    def __init__(self, *args, **kwargs):
        """
        A TransportAdapter that re-enables 3DES support in Requests.
        """
        CIPHERS = ORIGIN_CIPHERS.split(':')
        random.shuffle(CIPHERS)
        CIPHERS = ':'.join(CIPHERS)
        self.CIPHERS = CIPHERS + ':!aNULL:!eNULL:!MD5'
        super().__init__(*args, **kwargs)

    def init_poolmanager(self, *args, **kwargs):
        context = create_urllib3_context(ciphers=self.CIPHERS)
        kwargs['ssl_context'] = context
        return super(DESAdapter, self).init_poolmanager(*args, **kwargs)

    def proxy_manager_for(self, *args, **kwargs):
        context = create_urllib3_context(ciphers=self.CIPHERS)
        kwargs['ssl_context'] = context
        return super(DESAdapter, self).proxy_manager_for(*args, **kwargs)

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.67'}
s = requests.Session()
s.headers.update(headers)
s.mount('https://ja3er.com', DESAdapter())
resp = s.get('https://ja3er.com/json').json()
print(resp)

aiohttp

import random
import ssl
import asyncio
import aiohttp

# ssl._create_default_https_context = ssl._create_unverified_context


ORIGIN_CIPHERS = ('ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
                  'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES')


class SSLFactory:
    def __init__(self):
        self.ciphers = ORIGIN_CIPHERS.split(":")

    def __call__(self) -> ssl.SSLContext:
        random.shuffle(self.ciphers)
        ciphers = ":".join(self.ciphers)
        ciphers = ciphers + ":!aNULL:!eNULL:!MD5"
        context = ssl.create_default_context()
        context.set_ciphers(ciphers)
        return context


sslgen = SSLFactory()
async def main():
    async with aiohttp.ClientSession() as session:
         async with session.get("https://ja3er.com/json", headers={}, ssl=sslgen()) as resp:
                data = await resp.json()
                print(data)

asyncio.get_event_loop().run_until_complete(main())

httpx:

异步模式:
import httpx
import asyncio
import random
import ssl

ORIGIN_CIPHERS = ('ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
                  'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES')


class SSLFactory:
    def __init__(self):
        self.ciphers = ORIGIN_CIPHERS.split(":")

    def __call__(self) -> ssl.SSLContext:
        random.shuffle(self.ciphers)
        ciphers = ":".join(self.ciphers)
        ciphers = ciphers + ":!aNULL:!eNULL:!MD5"
        context = ssl.create_default_context()
        context.set_ciphers(ciphers)
        return context


sslgen = SSLFactory()
async def main():
    async with httpx.AsyncClient(verify=sslgen()) as client:
        resp = await client.get('https://ja3er.com/json')
        result = resp.json()
        print(result)


asyncio.run(main())
同步模式:
import httpx
import asyncio
import random
import ssl

ORIGIN_CIPHERS = ('ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
                  'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES')


class SSLFactory:
    def __init__(self):
        self.ciphers = ORIGIN_CIPHERS.split(":")

    def __call__(self) -> ssl.SSLContext:
        random.shuffle(self.ciphers)
        ciphers = ":".join(self.ciphers)
        ciphers = ciphers + ":!aNULL:!eNULL:!MD5"
        context = ssl.create_default_context()
        context.set_ciphers(ciphers)
        return context


sslgen = SSLFactory()
with httpx.Client(headers={}, http2=True, verify=sslgen()) as client:
    response = client.get('https://ja3er.com/json')
    print(response.text)

scrapy

class MyHTTPDownloadHandler(HTTPDownloadHandler):
    def shuffle_ciphers(self):
        self.ORIGIN_CIPHERS = ('ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
                               'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES')
        CIPHERS = self.ORIGIN_CIPHERS.split(':')
        random.shuffle(CIPHERS)
        CIPHERS = ':'.join(CIPHERS) + ':!aNULL:!eNULL:!MD5'
        return CIPHERS

    def download_request(self, request, spider):
        tls_ciphers = self.shuffle_ciphers()
        self._contextFactory = ScrapyClientContextFactory(tls_ciphers=tls_ciphers)
        return super().download_request(request, spider)

爬虫配置文件

custom_settings = {
        "CONCURRENT_REQUESTS": 5,
        "DOWNLOAD_DELAY": 1,
        "DOWNLOAD_TIMEOUT": 10,
        "RETRY_TIMES": 3,
       	"DOWNLOAD_HANDLERS": {
            'http': 'scrapy_project.middlewares.MyHTTPDownloadHandler',
            'https': 'scrapy_project.middlewares.MyHTTPDownloadHandler',
        }
    }
### 使用Python生成或解析JA3指纹 #### 什么是JA3指纹 JA3是一种用于识别TLS客户端配置的独特字符串表示形式。通过捕获并分析这些特征,可以有效地追踪特定设备或应用程序发出的网络请求[^1]。 #### 如何安装依赖库 为了实现这一功能,在Python环境中需先安装必要的第三方模块`pyja3`以及其依赖项: ```bash pip install pyja3 ``` 此命令会自动处理所有必需组件的下载与设置工作。 #### 生成JA3指纹实例 下面展示了一段简单的代码片段来说明怎样利用上述工具包创建一个新的SSL/TLS连接,并从中提取对应的JA3哈希值及其可读版本描述: ```python import ssl from ja3 import get_ja3_string, get_hash context = ssl.create_default_context() with socket.create_connection(("example.com", 443)) as sock: with context.wrap_socket(sock, server_hostname="example.com") as ssock: tls_info = { 'version': ssock.version(), 'cipher': ssock.cipher()[0], 'extensions': [], 'elliptic_curves': [], 'ec_point_fmts': [] } # 获取完整的JA3字符串表达式 ja3_str = get_ja3_string(tls_info) # 计算MD5摘要作为紧凑型标识符 ja3_md5 = get_hash(ja3_str) print(f"JA3 String: {ja3_str}") print(f"JA3 Hash: {ja3_md5}") ``` 这段脚本模拟了一个典型的HTTPS握手过程,并收集有关所使用的协议版本、加密套件以及其他参数的信息,进而构建出相应的JA3签名。 #### 解析现有JA3指纹数据 对于已经获取到的手动输入或者其他途径得到的JA3字符串,可以通过调用专门的方法来进行反向工程,恢复原始的安全协商细节: ```python def parse_ja3(ja3_string): parts = ja3_string.split(",") result = {} try: result['ssl_version'] = int(parts[0]) result['accept_cipher_suites'] = list(map(int, parts[1].split("-"))) result['order_of_ec_points_formats'] = list(map(int, parts[-1].split("-"))) if len(parts)>2 else None return result except Exception as e: print(e) return {} parsed_data = parse_ja3("769,49195-49199-52393-49196-49200,,0") for key,value in parsed_data.items(): print(key+": "+str(value)) ``` 该函数接受标准格式化的JA3文本串作为参数,将其分割成各个组成部分后返回字典对象以便进一步操作和理解。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值