scrapy——京东

最新推荐文章于 2024-05-02 17:41:07 发布

(๑• . •๑)框

最新推荐文章于 2024-05-02 17:41:07 发布

阅读量106

点赞数

分类专栏： scrapy学习路径

本文链接：https://blog.csdn.net/weixin_45051787/article/details/104882438

版权

scrapy学习路径专栏收录该内容

4 篇文章 0 订阅

订阅专栏

import scrapy
import json


class CatalogSpider(scrapy.Spider):
    name = 'catalog'
    allowed_domains = ['3.cn']
    start_urls = ['https://dc.3.cn/category/get']

    def parse(self, response):
        jd_json = json.loads(
            str(response.body,encoding='gbk'),
            encoding='gbk'
        )
        result=[]
        for data in jd_json['data']:
            for data2 in data['s']:
                url=data2['n'].split('|')[0]
                title = data2['n'].split('|')[1]
                res1 = {
                    "url": url,
                    "title": title,
                    "child": []
                }
                result.append(res1

                )

                for data3 in data2['s']:
                    url2=data3['n'].split('|')[0]
                    title2 = data3['n'].split('|')[1]
                    res2 = {
                        "url": url2,
                        "title": title2,
                        "child": []
                    }
                    res1["child"].append(
                        res2

                    )
                    for data4 in data3['s']:
                        url3 = data4['n'].split('|')[0]
                        title3 = data4['n'].split('|')[1]
                        res2['child'].append({
                            "url":url3,
                            "title":title3
                        })
                    res1["child"].append(res2)
                result.append(res1)
        print(result)