python抓取淘宝商品_python爬取淘宝商品信息

# -*- coding: utf-8 -*

import requests

# import bs4

import re

import json

def open(keywords, page):

headers = {

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}

payload = {'q': keywords, 'sort': "sale-desc", 's': (page - 1) * 44}

url = "https://s.taobao.com/search"

res = requests.get(url, params=payload)

# https://s.taobao.com/search?q=羽绒服&sort=sale-desc&s=44

return res

def get_item(res):

g_page_config = re.search(r'g_page_config = (.*?);\n', res.text)

page_config_json = json.loads(g_page_config.group(1))

page_item = page_config_json['mods']['itemlist']['data']['auctions']

result = [] # 整理出我们关注的信息(ID,标题,链接,售价,销量和商家)

for each in page_item:

dict1 = dict.fromkeys(('id', 'title', 'link', 'price', 'sale', 'shoper'))

dict1['id'] = each['nid']

dict1['title'] = each['title']

dict1['link'] = each['detail_url']

dict1['price'] = each['view_price']

dict1['sale'] = each['view_sales']

dict1['shoper'] = each['nick']

result.append(dict1)

return result

def count_sales(items):

count = 0

for each in items:

if u'黑色' in each['title']: # 规定只取标题中‘###'的商品

count += int(re.search(r'\d+', each['sale']).group())

return count

def main():

keywords = raw_input("请输入搜索关键词:") # 可以为各种商品名称 羽绒服

length = 2 # 淘宝商品页数

total = 0

for each in range(length):

res = open(keywords, each + 1)

# print res.text

items = get_item(res)

print items

total += count_sales(items) # 销售总量

print(total)

if __name__ == "__main__":

main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值