Python抓取千牛后台订单数据（3个月前的订单毫无压力）

胡子哥502

已于 2025-01-11 16:04:42 修改

阅读量1.1k

点赞数 3

文章标签： python 开发语言

于 2024-08-17 14:13:18 首次发布

本文链接：https://blog.csdn.net/huweijun_2012/article/details/141279931

版权

废话不多说，直接上代码。

# 千牛订单数据抓取
# pip install DrissionPage
# http://www.drissionpage.cn/

from DrissionPage import ChromiumPage, WebPage, SessionPage, SessionOptions, ChromiumOptions
import time
from commom.FileUtil import FileUtil

file_path = "D:\\qianniu_order_data.txt"
FileUtil.write_content(file_path, "")

login_url = 'https://loginmyseller.taobao.com/'
login_account = '账号。。。'
login_pwd = '密码。。。。'

page = WebPage()
page.get(login_url)

print("登录页面打开")
account_input = page.ele("#fm-login-id")
pwd_input = page.ele("#fm-login-password")
login_btn = page.ele(".fm-button fm-submit password-login")

account_input.input(login_account)
pwd_input.input(login_pwd)
login_btn.click()
page.wait.load_start()

print("登录成功")

page.get("https://myseller.taobao.com/home.htm/trade-platform/tp/sold")
page.wait.load_start()
page.listen.start("https://trade.taobao.com/trade/itemlist/asyncSold.htm?event_submit_do_query=1&_input_charset=utf8")
sgyq_ele = page.ele("@text()=3个月前")
sgyq_ele.click()
# page.wait.load_start()
res = page.listen.wait()  # 等待并获取一个数据包
if res.response.status == 200:
    print("返回的数据：", res.response.url)
    FileUtil.append_content(file_path, str(res.response.body) + "\n")

print("第一页数据获取完成。。。")

while True:
    next_btn_helper = page.ele("@text()=下一页")
    print("下一页按钮text：", next_btn_helper.text)
    disabled = next_btn_helper.parent().attr("disabled")
    print("disabled=", disabled)
    current_page_ele = page.ele(".next-btn next-medium next-btn-normal next-pagination-item next-current")
    print("当前页码 = ", current_page_ele.ele("tag:span").text)
    if disabled:
        break

    next_btn_helper.click()
    # page.wait.load_start()
    res = page.listen.wait()  # 等待并获取一个数据包
    if res.response.status == 200:
        print("返回的数据：", res.response.url)
        FileUtil.append_content(file_path, str(res.response.body) + "\n")
    time.sleep(2)

# page.ele("#nc_1_n1z")
# # 左键按住元素
# page.actions.hold('#nc_1_n1z')
# # 向右移动鼠标300像素
# page.actions.right(260)
# # 释放左键
# page.actions.release()


time.sleep(100)

FileUtil

class FileUtil:
    @staticmethod
    def write_content(file_path, content):
        """
        写入内容到文件，如果文件已存在则覆盖，如果文件不存在则创建。
        :param file_path: 文件路径
        :param content: 要写入的内容
        """
        try:
            with open(file_path, 'w', encoding='utf-8') as file:
                file.write(content)
            print(f"内容已成功写入到 {file_path}")
        except IOError as e:
            print(f"写入文件时发生错误: {e}")

    @staticmethod
    def append_content(file_path, content):
        """
        向文件末尾追加内容，如果文件不存在则创建。
        :param file_path: 文件路径
        :param content: 要追加的内容
        """
        try:
            with open(file_path, 'a', encoding='utf-8') as file:
                file.write(content)
            print(f"内容已成功追加到 {file_path}")
        except IOError as e:
            print(f"追加内容到文件时发生错误: {e}")

    @staticmethod
    def delete_file(file_path):
        """
        删除文件
        :param file_path: 文件路径
        """
        try:
            import os
            os.remove(file_path)
            print(f"文件 {file_path} 已成功删除")
        except FileNotFoundError:
            print(f"文件 {file_path} 不存在")
        except IOError as e:
            print(f"删除文件时发生错误: {e}")

    @staticmethod
    def read_lines(file_path):
        """
        读取TXT文件，将每行内容作为列表的一个元素返回。
        如果文件不存在或读取过程中发生错误，则返回空列表。
        :param file_path: 要读取的TXT文件的路径
        :return: 包含文件每行内容的列表
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                lines = file.readlines()
                # 去除每行末尾的换行符（如果有的话）
                return [line.strip() for line in lines]
        except FileNotFoundError:
            print(f"文件 {file_path} 不存在")
            return []
        except IOError as e:
            print(f"读取文件 {file_path} 时发生错误: {e}")
            return []