import urllib.request
import urllib.parse
import urllib.error
import json
import jsonpath
import pandas as pd
import time
url="***"#可从Fiddler中查看
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
name = ['Id','类名','商品名','积分','价格','描述','库存']#列名
df = pd.DataFrame(columns=name)
for n in range(1,416):
formdata={"brand_id":"0","category2_id":"0","category3_id":"0","brand_id":"","request":"","page":n}
mydata = urllib.parse.urlencode(formdata).encode("utf-8")
req=urllib.request.Request(url,mydata,headers)#post提交方式
data=urllib.request.urlopen(req).read()
jsonobj = json.loads(data)#将已编码的 JSON 字符串解码为 Python 对象
Idlist = jsonpath.jsonpath(jsonobj,'$..KeyId') # 从根节点开始,匹配KeyId等节点
namelist = jsonpath.jsonpath(jsonobj,'$..name')
brieflist = jsonpath.jsonpath(jsonobj,'$..brief')
pointlist = jsonpath.jsonpath(jsonobj,'$..max_point')
pricelist = jsonpath.jsonpath(jsonobj,'$..max_price')
desclist = jsonpath.jsonpath(jsonobj,'$..recommendDesc')
quantitylist = jsonpath.jsonpath(jsonobj,'$..quantity')
lennum=(len(namelist))
for i in range(lennum):
s = pd.Series({'Id':Idlist[i],'类名':brieflist[i],'商品名':namelist[i],'积分':pointlist[i],'价格':pricelist[i],'描述':desclist[i],'库存':quantitylist[i]}) # Series 必须是 dict-like 类型
df = df.append(s, ignore_index=True)
time.sleep(5)
df.to_csv("D://python/dh/dh1.csv",index=False,encoding="gb18030")
截选文件部分数据如下图所示: