爬取小米有品app商品数据

本文旨在交流学习,勿作他用,否则后果自负
环境 linux+pycharm+anaconda

import csv
import requests
from lxml import etree
import re
import random
import json
from usere_agent import UA
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


url = 'https://youpin.mi.com/app/shopv3/pipe'
headers1 = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '130',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Host': 'youpin.mi.com',
    'Origin': 'https://youpin.mi.com',
    'Referer': 'https://youpin.mi.com/',
    'User-Agent': UA)
}

headers2 = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Content-Length': '145',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': (你的cookie),
            'Origin': 'https://youpin.mi.com',
            'Referer': 'https://youpin.mi.com/',
            'User-Agent': 	UA,
}


headers3 = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Content-Length': '364',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Host': 'youpin.mi.com',
            'Origin': 'https://youpin.mi.com',
            'Referer': 'https://youpin.mi.com/detail?gid={}'.format(str(id)),
            'User-Agent': UA,
        }


data1 = {
    'mkbl_data': '{"result": {"model": "Homepage", "action": "GetGroup2ClassInfo", "parameters": {}}}',
}
req = requests.post(url=url, headers=headers1, data=data1, verify=False).json()
groups = req['result']['result']['mkbl_data']['groups']

c_name=[]
c_id=[]
for i in groups:
    for j in i:
        class1_name = j['class']['name']
        ucid1 = j['class']['ucid']
        c_name.append(class1_name)
        c_id.append(ucid1)
        for k in j['sub_class']:
            class2_name = k['name']
            ucid2 = k['ucid']

for i,j in zip(c_name,c_id):
    s = requests.session()
    s.headers.update(headers2)

    data2 = {
        'mkbl_data': '{"uClassList": {"model": "Homepage", "action": "BuildHome", "parameters": {"id": "' + str(
            j) + '"}}}'
    }
    respon = s.post(url=url, data=data2, verify=False).json()
    print(respon)
    itemdata = respon['result']['uClassList']['mkbl_data']
    for j in itemdata:
        if 'content' in j:
            content_name = j['content']['name']
            ucid = j['content']['ucid']
            for k in j['mkbl_data']:
                try:

                    gid = k['gid']  ##商品ID
                    name = k['name']  ##商品名称
                    summary = k['summary']  ##商品简介
                    pic_url = k['pic_url']  ##商品图片
                    price_min = int(k['price_min']) / 100  ##价格
                    itemurl = k['url']  ##商品链接
                    print(i,name,summary,pic_url,price_min,itemurl)
                    with open('/media/liu/_dde_data/project/spider/供应商/xmyp/' + i + '.csv', 'a+') as f:
                        f_csv = csv.writer(f)
                        f_csv.writerows([(i,name,summary,pic_url,price_min,itemurl)])
                except:
                    continue



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值