虾皮获取商品信息

無敗の草

已于 2024-09-14 15:13:58 修改

阅读量203

点赞数 1

文章标签：前端 javascript 数据库

于 2024-09-14 15:10:06 首次发布

本文链接：https://blog.csdn.net/weixin_52717285/article/details/142259118

版权

虾皮获取商品信息

用于从Shopee网站获取商品信息。它接受一个header参数（通常包含用户代理和cookie等信息）和一个可选的URL参数（默认为"https://shopee.co.id/"）。如果提供了有效的URL，它将尝试获取该URL的商品信息，包括标题、运费、属性、图片等。如果没有提供有效的URL或登录信息，它将返回错误信息。

import requests
from bs4 import BeautifulSoup
import re
import json
from urllib.parse import urlparse
import uuid

def get_goods_info(header, Url="https://shopee.co.id/", *args, **kwargs):
    """
    :获取shopee商品信息
    """
    # 判断URL是否存在
    if Url == "https://shopee.co.id/":
        return {"code": "0001", "error": "not url"}

    response = requests.get(Url, headers=header)
    soup = BeautifulSoup(response.text, 'html.parser')
    if not response.headers.get("X-Request-ID"):
        return {"code": "0002", "error": "not login"}

    data_box = {
        "title": soup.find("div", attrs={"class": "WBVL_7"}),
        "freight": soup.find("div", attrs={"class": "flex items-center PZGOkt"}),
        "attrs": soup.find("div", attrs={"class": "flex KIoPj6 W5LiQM"}),
    }
    for box in data_box.values():
        if box:
            continue
        return {}

    att = {}
    for i in data_box["attrs"].findAll("section"):
        if not i.find("div", attrs={"class": "flex items-center j7HL5Q"}):
            continue
        key = i.find("h3").text
        value = []
        for x in i.find("div", attrs={"class": "flex items-center j7HL5Q"}).findAll("button"):
            value.append({"src": x.find("img").get("src") if x.find("img") else None, "attr_name": x.text})
        att[key] = value

    img_block = soup.find("div", attrs={"class": "airUhU"}).findAll("div", attrs={"class": "UBG7wZ"})
    if not img_block:
        return {}
    images = [img.find("img").attrs.get("src") for img in img_block]

    # id参数筛选
    result = re.search(r'-i.\d+\.\d+|/\d+/\d+', urlparse(Url).path)
    if not result:
        return {}
    at = re.search(r'\d+\.\d+|\d+/\d+', result.group()).group()
    if not at:
        return {}

    at = at.split(".") if "." in at else at.split("/")
    get_info_response = requests.get("https://shopee.co.id/api/v4/pdp/get_pc", headers=header, params={
        "shop_id": at[0], "item_id": at[1], "detail_level": 0
    })
    get_info_text = BeautifulSoup(get_info_response.text, 'html.parser')
    p_data = json.loads(get_info_text.text)
    if not p_data and not len(p_data) >= 400:
        return {}
    p_datad = p_data.get("data").get("product_attributes").get("attrs")
    models = p_data.get("data").get("item").get("models")
    p_datad = [{"name": i.get("name"), "value": i.get("value")} for i in p_datad]
    description = p_data.get("data").get("item").get("description")

    return {
        "url":Url,
        "title": data_box["title"].text,
        "freight": data_box["freight"].text,
        "attrs_info": att,
        "images": images,
        "info": p_datad,
        "models": models,
        "description": description,
    }