抓取某宝所有分类
'''
操作说明:
1、mitmdump -s /Users/fanding/spiderCode/spider-python/po_taobao/taobao_category.py
2、手动点击分类
3、执行cat category.log | grep category: | awk -F ':' '{print $4}' > categoryTem.log | sed 's/-/,/g' > category.csv
'''
import pdb
import json
import logging
logging.basicConfig(filename='/Users/fanding/spiderCode/spider-python/po_taobao/category.log', level=logging.DEBUG)
def response(follow):
m_request = follow.request
m_response = follow.response
if 'https://guide-acs.m.taobao.com/gw/mtop.relationrecommend.wirelessrecommend.recommend/2.0' in m_request.url:
logging.info('category: 一级分类-二级分类-三级分类')
categoryData = json.loads(m_response.text)
data = categoryData.get('data', {}).get('result', [])
if not data:
logging.warning('data is null')
return m_response
result = data[0]
industryId = result.get('industryId')
industryList = result.get('industryList')
moduleList = result.get('moduleList')
categoryOne = ''
for industry in industryList:
if industry.get('industryId') == industryId:
categoryOneName = industry.get('industryName')
for module in moduleList:
categoryTwoName = module.get('title')
items = module.get('items')
for item in items:
categoryThreeName = item.get('show_name')
logging.info('category: %s-%s-%s' % (categoryOneName, categoryTwoName, categoryThreeName))
return m_response
复制代码
前提是配置好mitmproxy证书等配置,可抓取app数据包