废话少说直接上代码:
import re
import requests
def getHtml(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
print(r.status_code)
r.encoding=r.apparent_encoding
# print(r.text)
return r.text
except:
return ""
def parserPage(lis,html):
try:
#正则表达式
plt=re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
tlt=re.findall(r'\"raw_title\"\:\".*?\"',html)
for i in range(len(plt)):
price=eval(plt[i].split(':')[1])
title=eval(tlt[i].split(':')[1])
lis.append([price,title])
except:
print("")
def printGoods(lis):
tplt="{:4}\t{:8}\t{:16}"
print(tplt.format("序号","价格","商品名称"))
count=0
for q in lis:
print(q[0])
count=count+1
print(tplt.format(count,q[0],q[1]))
print()
def main():
goods="书包"
counts =2
start_url = "https://s.taobao.com/search?q=" + goods
#start_urls="https://s.taobao.com/search?q=书包"
infoList = []
for i in range(counts):
try:
url="https://s.taobao.com/search?q=%E4%B9%A6%E5%8C%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=3&ntoffset=0&p4ppushleft=1%2C48&s=88"
#url=start_url+'&s=' + str(44 * i)
#url = start_url + '&s=' + str(44 * i)
html = getHtml(url)
parserPage(infoList,html)
except:
continue
printGoods(infoList)
main()