淘宝商品价格对比
实例:找一个淘宝商品,可以自定义显示几页的商品信息
代码:
# 淘宝商品价格对比爬虫
import requests
import re
def get_all_http(url):
try:
r = requests.get(url, timeout=30)
r.coding = r.apparent_encoding
r.raise_for_status()
return r.text
except:
return ""
def analyse_my_data(data, lst1):
try:
plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"', data) # 商品价格 正则表达式
plt1 = re.findall(r'\"raw_title\"\:\".*?\"', data) # 商品名称 .*?是最小匹配,取得:后的内容 再一系列直接存入表格
for i in range(len(plt)): # 关联两个信息
price = eval(plt[i].split(':')[1])
title = eval(plt1[i].split(':')[1])
lst1.append([price, title])
return lst1
except:
return ""
def print_data(lst1):
ptlt = "{0:^10}\t{1:^10}\t{2:{3}^10}"
print(ptlt.format("序号", "价格", "商品",chr(12288)))
n = 0
for i in lst1:
n = n+1
print(ptlt.format(n,i[0], i[1],chr(12288)))
def main(deepth):
# 找到淘宝页面url的规律
url0 = "https://s.taobao.com/search?q=mac+pro+2017&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180615&ie=utf8"
url1 = "https://s.taobao.com/search?q=mac+pro+2017&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180615&ie=utf8&bcoffset=4&p4ppushleft=1%2C48&s=44&ntoffset=4"
url2 = "https://s.taobao.com/search?q=mac+pro+2017&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180615&ie=utf8&bcoffset=4&p4ppushleft=1%2C48&ntoffset=4&s="
for i in range(deepth):
if i == 0:
url = url0
elif i == 1:
url = url1
else:
url1 = url2+str(i*44)
# 爬取所有网页 的资源
data = get_all_http(url)
#解析网页资源
data2 = analyse_my_data(data, [])
# 显示商品 价格
print_data(data2)
main(5)