- 登录淘宝,进入搜索页,F12
- 选择Network,刷新一下,找到最上方以search?开头的文件,右键
- 选择copy,copy as cURL(bush)
![copy](https://img-blog.csdnimg.cn/20200524140052333.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM4Njg5Mzk1,size_16,color_FFFFFF,t_70)
- 在https://curl.trillworks.com/,将上一步复制的内容粘贴到curl command窗口
![复制cookie](https://img-blog.csdnimg.cn/20200524140336490.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM4Njg5Mzk1,size_16,color_FFFFFF,t_70)
- 复制右侧的headers内容,在程序中用以变量header保存,作为参数传给
requests.get(url,headers=header)
import requests
import re
headers = {
'cookie': '',
'User-Agent': 'Mozilla/5.0',
}
def getHtmlText(url):
try:
r = requests.get(url,timeout=30,headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
print("获取失败\n")
def parsePage(ilt,html):
try:
plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
tlt = re.findall(r'\"raw_title\"\:\".*?\"',html)
for i in range(len(plt)):
price = eval(plt[i].split(':')[1])
title = eval(tlt[i].split(':')[1])
ilt.append([price,title])
except:
print("解析失败\n")
def printGoodsList(ilt):
tplt = "{:4}\t{:8}\t{:16}"
print(tplt.format("序号","价格","商品名称"))
count = 0
for g in ilt:
count += 1
print(tplt.format(count,g[0],g[1]))
def main():
goods = '沙发'
depth = 3
start_url = 'https://s.taobao.com/search?q=' + goods
infoList = []
for i in range(depth):
try:
url = start_url + '&s=' + str(44*i)
html = getHtmlText(url)
parsePage(infoList,html)
except:
continue
printGoodsList(infoList)
main()
![示例](https://img-blog.csdnimg.cn/20200524140601289.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM4Njg5Mzk1,size_16,color_FFFFFF,t_70)