import scrapy
import json
class CatalogSpider(scrapy.Spider):
name = 'catalog'
allowed_domains = ['3.cn']
start_urls = ['https://dc.3.cn/category/get']
def parse(self, response):
jd_json = json.loads(
str(response.body,encoding='gbk'),
encoding='gbk'
)
result=[]
for data in jd_json['data']:
for data2 in data['s']:
url=data2['n'].split('|')[0]
title = data2['n'].split('|')[1]
res1 = {
"url": url,
"title": title,
"child": []
}
result.append(res1
)
for data3 in data2['s']:
url2=data3['n'].split('|')[0]
title2 = data3['n'].split('|')[1]
res2 = {
"url": url2,
"title": title2,
"child": []
}
res1["child"].append(
res2
)
for data4 in data3['s']:
url3 = data4['n'].split('|')[0]
title3 = data4['n'].split('|')[1]
res2['child'].append({
"url":url3,
"title":title3
})
res1["child"].append(res2)
result.append(res1)
print(result)
scrapy——京东
最新推荐文章于 2024-05-02 17:41:07 发布