虾皮获取商品信息

虾皮获取商品信息

用于从Shopee网站获取商品信息。它接受一个header参数(通常包含用户代理和cookie等信息)和一个可选的URL参数(默认为"https://shopee.co.id/")。如果提供了有效的URL,它将尝试获取该URL的商品信息,包括标题、运费、属性、图片等。如果没有提供有效的URL或登录信息,它将返回错误信息。

import requests
from bs4 import BeautifulSoup
import re
import json
from urllib.parse import urlparse
import uuid

def get_goods_info(header, Url="https://shopee.co.id/", *args, **kwargs):
    """
    :获取shopee商品信息
    """
    # 判断URL是否存在
    if Url == "https://shopee.co.id/":
        return {"code": "0001", "error": "not url"}

    response = requests.get(Url, headers=header)
    soup = BeautifulSoup(response.text, 'html.parser')
    if not response.headers.get("X-Request-ID"):
        return {"code": "0002", "error": "not login"}

    data_box = {
        "title": soup.find("div", attrs={"class": "WBVL_7"}),
        "freight": soup.find("div", attrs={"class": "flex items-center PZGOkt"}),
        "attrs": soup.find("div", attrs={"class": "flex KIoPj6 W5LiQM"}),
    }
    for box in data_box.values():
        if box:
            continue
        return {}

    att = {}
    for i in data_box["attrs"].findAll("section"):
        if not i.find("div", attrs={"class": "flex items-center j7HL5Q"}):
            continue
        key = i.find("h3").text
        value = []
        for x in i.find("div", attrs={"class": "flex items-center j7HL5Q"}).findAll("button"):
            value.append({"src": x.find("img").get("src") if x.find("img") else None, "attr_name": x.text})
        att[key] = value

    img_block = soup.find("div", attrs={"class": "airUhU"}).findAll("div", attrs={"class": "UBG7wZ"})
    if not img_block:
        return {}
    images = [img.find("img").attrs.get("src") for img in img_block]

    # id参数筛选
    result = re.search(r'-i.\d+\.\d+|/\d+/\d+', urlparse(Url).path)
    if not result:
        return {}
    at = re.search(r'\d+\.\d+|\d+/\d+', result.group()).group()
    if not at:
        return {}

    at = at.split(".") if "." in at else at.split("/")
    get_info_response = requests.get("https://shopee.co.id/api/v4/pdp/get_pc", headers=header, params={
        "shop_id": at[0], "item_id": at[1], "detail_level": 0
    })
    get_info_text = BeautifulSoup(get_info_response.text, 'html.parser')
    p_data = json.loads(get_info_text.text)
    if not p_data and not len(p_data) >= 400:
        return {}
    p_datad = p_data.get("data").get("product_attributes").get("attrs")
    models = p_data.get("data").get("item").get("models")
    p_datad = [{"name": i.get("name"), "value": i.get("value")} for i in p_datad]
    description = p_data.get("data").get("item").get("description")

    return {
        "url":Url,
        "title": data_box["title"].text,
        "freight": data_box["freight"].text,
        "attrs_info": att,
        "images": images,
        "info": p_datad,
        "models": models,
        "description": description,
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值