【python】采用pytest夹具的方发用selenium打开闲鱼,并通过捕获日志的方式获取接口的响应,并对响应数据进行处理,获取你搜索的商品的最新信息,并通过邮件通知你关注的商品的更新信息。

1、driver配置设置

采用了无头模式,并模拟用户在浏览器中的行为,来躲过闲鱼网站的反爬机制。设置chrome的能力来启用CDP,设置capabilities日志捕获,来获取点击操作后的请求响应。创建实例

def driver():
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options

    chrome_options = Options()
    chrome_options.add_experimental_option("prefs", {"profile.default_content_settings": {"images": 2}})
    chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option("useAutomationExtension", False)
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})

    driver = webdriver.Chrome(options=chrome_options)
    driver.get('https://www.goofish.com/')
    yield driver
    driver.quit()

2、自动化主体,引用实例,根据用户输入的商品名称,在页面搜索,将页面操作产生的请求响应全部捕捉,并提取精确url目标的信息,将获取到的信息处理后提取对应的商品信息,首次运行会直接邮件通知,后面每次循环只有当商品数量比上次多时才会邮件通知。

@pytest.mark.usefixtures("driver")
class Testxianyu1:
    def __init__(self):
        self.email_notifier = EmailNotifier()  # 使用 EmailNotifier 类

    @pytest.mark.parametrize("name", [("努比亚阿尔法手表")])
    def test_xianyushangping(self, driver, name):
        print(f"开始搜索 {name}...")

        search_input = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, ".search-input--WY2l9QD3"))
        )
        search_input.click()
        search_input.send_keys(name + Keys.ENTER)

        checkbox = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR,
                                              ".search-checkbox-item-container--DsTIZUle:nth-child(2) > .search-checkbox--fULWOSyM"))
        )
        checkbox.click()

        response_bodies = self.capture_all_network_responses(driver,
                                                             target_url_partial="mtop.taobao.idlemtopsearch.pc.search")

        if response_bodies:
            relevant_response = self.extract_relevant_response(response_bodies)
            if relevant_response:
                processed_items = self.process_data(relevant_response)
                filtered_items = self.filter_items(processed_items)

                # 如果是首次运行或有新的商品,则发送提醒邮件
                if len(filtered_items) > len(self.email_notifier.previous_filtered_items):
                    new_items = [item for item in filtered_items if
                                 item not in self.email_notifier.previous_filtered_items]
                    self.email_notifier.send_email(new_items)
                    print("发送邮件成功。")

3、capture_all_network_responses捕获全部的响应信息,如果捕获异常会输出对应提示,并对信息进行深度处理,extract_relevant_response方法根据url精确匹配对应响应数据,并使用我们需要的参数进行辅助识别,process_data方法,提取我们需要的信息字段,并拼接、排序、储存

    def capture_all_network_responses(self, driver, target_url_partial):
        logs = driver.get_log('performance')
        response_bodies = []
        print("开始捕获网络响应...")  # 调试信息

        for log in logs:
            log_dict = json.loads(log['message'])['message']
            if log_dict['method'] == 'Network.responseReceived':
                response_details = log_dict.get('params', {}).get('response', {})
                url = response_details.get('url', '')
                mime_type = response_details.get('mimeType', '')

                if target_url_partial in url and 'application/json' in mime_type:
                    request_id = log_dict['params']['requestId']
                    try:
                        response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})
                        response_body = response.get('body', '')
                        if response_body:
                            response_bodies.append(response_body)
                    except Exception as e:
                        if 'No resource with given identifier found' not in str(e):
                            print(f"异常捕获: {str(e)}")

        print(f"捕获到 {len(response_bodies)} 个响应")  # 调试信息
        return response_bodies

    def extract_relevant_response(self, response_bodies):
        for response_body in response_bodies:
            try:
                response_body_json = json.loads(response_body)
                if 'data' in response_body_json and 'resultList' in response_body_json['data']:
                    return response_body_json
            except json.JSONDecodeError:
                continue
        return None

    def process_data(self, data):
        if 'data' not in data or 'resultList' not in data['data']:
            print("响应格式不符合预期:", data)
            return []

        items = data['data']['resultList']
        processed_items = []
        for item in items:
            item_data = item['data']
            title = item_data.get('title', '')
            description = item_data.get('description', '')
            province = item_data.get('province', '')
            city = item_data.get('city', '')
            area = item_data.get('area', '')
            user_nick = item_data.get('userNick', '')
            sold_price = item_data.get('soldPrice', '')
            image_urls = item_data.get('imageUrls', [])

            # 提取商品成色
            condition = next(
                (label['propertyValuesList'][0]['value'] for label in json.loads(item_data.get('pvLabels', '[]')) if
                 label.get('propertyKeyName') == '成色'), '无')

            # 提取商家信用
            seller_credit = '无'
            if any(tag['data'].get('content') == '卖家信用极好' for tag in
                   item_data.get('fishTags', {}).get('r4', {}).get('tagList', [])):
                seller_credit = '卖家信用极好'

            # 提取商品验证方法
            verification_method = '无'
            if any(tag['data'].get('content') == '验货宝' for tag in
                   item_data.get('fishTags', {}).get('r1', {}).get('tagList', [])):
                verification_method = '验货宝'

            
            try:
                price_value = float(sold_price) if sold_price else float('inf')
            except ValueError:
                price_value = float('inf')

            processed_items.append({
                "description": f"{title} {description}",
                "address": f"{province} {city} {area}",
                "seller": user_nick,
                "price": sold_price,
                "price_value": price_value,
                "images": image_urls,
                "condition": condition,
                "seller_credit": seller_credit,
                "verification_method": verification_method
            })

        
        return sorted(processed_items, key=lambda x: x['price_value'])

4、filter_items方法,可以根据上面我们得到的商家信息数据重点成色、价格、卖家信用等字段对的判断条件对商家信息进行精度过滤。

 def filter_items(self, items):
        return [item for item in items if (float(item["price"]) >= 300)]

5、创建EmailNotifier类来对邮件中展示信息格式样式简单处理,当用户首次运行时通知邮件中的信息字体颜色均为黑色,后面每次循环发现的新商品后,邮件中新商品信息的字体颜色会变红,用来和原有的商品进行区分,方便查阅。

# 邮件配置
email_config = {
    "user": "xxxxxx@qq.com",
    "password": "xxxxxx",
    "host": "smtp.qq.com",
    "to": ["xxxxxxxx@qq.com", "xxxxxxxx@qq.com"]
}
EMAIL_SUBJECT = '商品更新通知'


class EmailNotifier:
    def __init__(self):
        self.previous_filtered_items = []  # 用于存储上次过滤后的商品信息

    def send_email(self, new_items):
        yag = yagmail.SMTP(email_config['user'], email_config['password'], email_config['host'])
        contents = ["<h3>商品信息如下:</h3>"]

        # 如果是首次发送邮件,所有商品信息为黑色字体
        if not self.previous_filtered_items:
            for item in new_items:
                contents.append(self.format_item(item, color='black'))
        else:
            # 添加之前的商品信息(黑色字体)
            for item in self.previous_filtered_items:
                contents.append(self.format_item(item, color='black'))

            # 添加新增的商品信息(红色字体)
            for item in new_items:
                contents.append(self.format_item(item, color='red'))

        yag.send(email_config['to'], EMAIL_SUBJECT, contents)
        self.previous_filtered_items.extend(new_items)  # 更新上次过滤后的商品信息

    def format_item(self, item, color):
        return (
                f"<p style='color:{color};'>描述: {item['description']}<br>价格: {item['price']}<br>"
                f"成色: {item['condition']}<br>商家信用: {item['seller_credit']}<br>"
                f"商品验证: {item['verification_method']}<br>商家名称: {item['seller']}<br>"
                f"商家地址: {item['address']}<br>图片链接: {', '.join(item['images'])}</p>"
                + "<p>" + "-" * 70 + "</p>"  # 70 个短横线分隔符
        )

6、运行时间设置,设置了脚本无限循环,每天00:00~24:00(可自定义),每隔一段时间执行,间隔时间为(10~30)分钟中任取一个随机数(可自定义时间)


def run_test():
    try:
        print("开始测试运行...")
        pytest.main(["-q", "test_xianyu8.py::Testxianyu1"])
        print("测试运行成功")
    except Exception as e:
        print(f"测试运行时遇到错误: {e}")


def schedule_next_run():
    next_run_in_minutes = random.randint(1, 2)
    print(f"下次运行将在 {next_run_in_minutes} 分钟后")
    schedule.every(next_run_in_minutes).minutes.do(run_and_reschedule)
    return next_run_in_minutes


def run_and_reschedule():
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"准备在 {current_time} 运行测试...")
    run_test()
    next_run_in_minutes = schedule_next_run()
    next_run_time = datetime.now() + timedelta(minutes=next_run_in_minutes)
    print(f"测试运行完成,下次运行时间: {next_run_time.strftime('%Y-%m-%d %H:%M:%S')}")

print("首次运行...")
run_and_reschedule()

while True:
    current_hour = datetime.now().hour
    if 0 <= current_hour < 24:
        schedule.run_pending()
    else:
        print(f"当前时间 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} 不在运行时间范围内。")
    time.sleep(1)

7、使用效果

8、完整代码如下,感谢阅读。

import schedule
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pytest
import yagmail
import json
import random
from datetime import datetime, timedelta
import time

# 邮件配置
email_config = {
    "user": "xxxxxxx@qq.com", # 邮箱名称
    "password": "xxxxxxxx",  # 邮箱授权码
    "host": "smtp.qq.com",
    "to": ["xxxxxx@qq.com"] # 收件人邮箱
}
EMAIL_SUBJECT = '商品更新通知'


class EmailNotifier:
    def __init__(self):
        self.previous_filtered_items = []  # 用于存储上次过滤后的商品信息

    def send_email(self, new_items):
        yag = yagmail.SMTP(email_config['user'], email_config['password'], email_config['host'])
        contents = ["<h3>商品信息如下:</h3>"]

        # 如果是首次发送邮件,所有商品信息为黑色字体
        if not self.previous_filtered_items:
            for item in new_items:
                contents.append(self.format_item(item, color='black'))
        else:
            # 添加之前的商品信息(黑色字体)
            for item in self.previous_filtered_items:
                contents.append(self.format_item(item, color='black'))

            # 添加新增的商品信息(红色字体)
            for item in new_items:
                contents.append(self.format_item(item, color='red'))

        yag.send(email_config['to'], EMAIL_SUBJECT, contents)
        self.previous_filtered_items.extend(new_items)  # 更新上次过滤后的商品信息

    def format_item(self, item, color):
        return (
                f"<p style='color:{color};'>描述: {item['description']}<br>价格: {item['price']}<br>"
                f"成色: {item['condition']}<br>商家信用: {item['seller_credit']}<br>"
                f"商品验证: {item['verification_method']}<br>商家名称: {item['seller']}<br>"
                f"商家地址: {item['address']}<br>图片链接: {', '.join(item['images'])}</p>"
                + "<p>" + "-" * 80 + "</p>"  # 70 个短横线分隔符
        )


@pytest.fixture(scope="class")
def driver():
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options

    chrome_options = Options()
    chrome_options.add_experimental_option("prefs", {"profile.default_content_settings": {"images": 2}})
    chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option("useAutomationExtension", False)
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})

    driver = webdriver.Chrome(options=chrome_options)
    driver.get('https://www.goofish.com/')
    yield driver
    driver.quit()


@pytest.mark.usefixtures("driver")
class Testxianyu1:
    def __init__(self):
        self.email_notifier = EmailNotifier()  # 使用 EmailNotifier 类

    @pytest.mark.parametrize("name", [("努比亚阿尔法手表")]) # 想要搜索的商品名称
    def test_xianyushangping(self, driver, name):
        print(f"开始搜索 {name}...")

        search_input = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, ".search-input--WY2l9QD3"))
        )
        search_input.click()
        search_input.send_keys(name + Keys.ENTER)

        checkbox = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR,
                                              ".search-checkbox-item-container--DsTIZUle:nth-child(2) > .search-checkbox--fULWOSyM"))
        )
        checkbox.click()

        response_bodies = self.capture_all_network_responses(driver,
                                                             target_url_partial="mtop.taobao.idlemtopsearch.pc.search")

        if response_bodies:
            relevant_response = self.extract_relevant_response(response_bodies)
            if relevant_response:
                processed_items = self.process_data(relevant_response)
                filtered_items = self.filter_items(processed_items)

                # 如果是首次运行或有新的商品,则发送提醒邮件
                if len(filtered_items) > len(self.email_notifier.previous_filtered_items):
                    new_items = [item for item in filtered_items if
                                 item not in self.email_notifier.previous_filtered_items]
                    self.email_notifier.send_email(new_items)
                    print("发送邮件成功。")

    def capture_all_network_responses(self, driver, target_url_partial):
        logs = driver.get_log('performance')
        response_bodies = []
        print("开始捕获网络响应...")  # 调试信息

        for log in logs:
            log_dict = json.loads(log['message'])['message']
            if log_dict['method'] == 'Network.responseReceived':
                response_details = log_dict.get('params', {}).get('response', {})
                url = response_details.get('url', '')
                mime_type = response_details.get('mimeType', '')

                if target_url_partial in url and 'application/json' in mime_type:
                    request_id = log_dict['params']['requestId']
                    try:
                        response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})
                        response_body = response.get('body', '')
                        if response_body:
                            response_bodies.append(response_body)
                    except Exception as e:
                        if 'No resource with given identifier found' not in str(e):  # 忽略部分无用异常信息,减少日志打印
                            print(f"异常捕获: {str(e)}")

        print(f"捕获到 {len(response_bodies)} 个响应")  # 调试信息
        return response_bodies

    def extract_relevant_response(self, response_bodies):
        for response_body in response_bodies:
            try:
                response_body_json = json.loads(response_body)
                if 'data' in response_body_json and 'resultList' in response_body_json['data']:
                    return response_body_json
            except json.JSONDecodeError:
                continue
        return None

    def process_data(self, data):
        if 'data' not in data or 'resultList' not in data['data']:
            print("响应格式不符合预期:", data)
            return []

        items = data['data']['resultList']
        processed_items = []
        for item in items:
            item_data = item['data']
            title = item_data.get('title', '')
            description = item_data.get('description', '')
            province = item_data.get('province', '')
            city = item_data.get('city', '')
            area = item_data.get('area', '')
            user_nick = item_data.get('userNick', '')
            sold_price = item_data.get('soldPrice', '')
            image_urls = item_data.get('imageUrls', [])

            # 提取商品成色
            condition = next(
                (label['propertyValuesList'][0]['value'] for label in json.loads(item_data.get('pvLabels', '[]')) if
                 label.get('propertyKeyName') == '成色'), '无')

            # 提取商家信用
            seller_credit = '无'
            if any(tag['data'].get('content') == '卖家信用极好' for tag in
                   item_data.get('fishTags', {}).get('r4', {}).get('tagList', [])):
                seller_credit = '卖家信用极好'

            # 提取商品验证方法
            verification_method = '无'
            if any(tag['data'].get('content') == '验货宝' for tag in
                   item_data.get('fishTags', {}).get('r1', {}).get('tagList', [])):
                verification_method = '验货宝'

            # 如果 price 不是数字或为空,则将其设置为较高的值以将其推送到排序列表的末尾
            try:
                price_value = float(sold_price) if sold_price else float('inf')
            except ValueError:
                price_value = float('inf')

            processed_items.append({
                "description": f"{title} {description}",
                "address": f"{province} {city} {area}",
                "seller": user_nick,
                "price": sold_price,
                "price_value": price_value,
                "images": image_urls,
                "condition": condition,
                "seller_credit": seller_credit,
                "verification_method": verification_method
            })

        # 按价格从低到高进行排序
        return sorted(processed_items, key=lambda x: x['price_value'])

    def filter_items(self, items):
        return [item for item in items if (float(item["price"]) >= 300)]


def run_test():
    try:
        print("开始测试运行...")
        pytest.main(["-q", "test_xianyu8.py::Testxianyu1"])
        print("测试运行成功")
    except Exception as e:
        print(f"测试运行时遇到错误: {e}")


def schedule_next_run():
    next_run_in_minutes = random.randint(1, 2)
    print(f"下次运行将在 {next_run_in_minutes} 分钟后")
    schedule.every(next_run_in_minutes).minutes.do(run_and_reschedule)
    return next_run_in_minutes


def run_and_reschedule():
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"准备在 {current_time} 运行测试...")
    run_test()
    next_run_in_minutes = schedule_next_run()
    next_run_time = datetime.now() + timedelta(minutes=next_run_in_minutes)
    print(f"测试运行完成,下次运行时间: {next_run_time.strftime('%Y-%m-%d %H:%M:%S')}")

print("首次运行...")
run_and_reschedule()

while True:
    current_hour = datetime.now().hour
    if 0 <= current_hour < 24:
        schedule.run_pending()
    else:
        print(f"当前时间 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} 不在运行时间范围内。")
    time.sleep(1)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值