import requests
import re
head={
'cookie':
'enc='
'x5sec='
,
'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)'
}
def getHTMLText(url):
try:
r=requests.get(url,timeout=30,headers=head)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def parsePage(ilt,html):
try:
plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"', html)
tlt = re.findall(r'\"raw_title\"\:\".*?\"', html)
for i in range(len(plt)):
price = eval(plt[i].split(':')[1])
title = eval(tlt[i].split(':')[1])
ilt.append([price, title])
except:
print("")
def printGoodsList(ilt):
print(f"{'序号':4}\t{'价格':8}\t{'商品名称':16}")
count=0
for i in ilt:
count=count+1
print(f"{count:4}\t{i[0]:8}\t{i[1]:16}")
def main():
goods="书包"
depth=2
start_url="https://s.taobao.com/search?q="+goods
infoList=[]
for i in range(depth):
try:
url=start_url+"&s="+str(44*i)
html=getHTMLText(url)
parsePage(infoList,html)
except:
continue
printGoodsList(infoList)
main()
Python淘宝比价定向爬虫
最新推荐文章于 2024-04-08 16:48:07 发布