亚马逊商品排名采集

# -*- coding:utf-8 -*-
import requests
import re
from bs4 import BeautifulSoup
from lxml import etree

headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'cookie': 'session-id=259-8860287-8140938; ubid-acbes=261-0942761-3510766; session-token="k8EXd2cEi7hIOL5Utm3BpSFGnJtLgdr5jf8glwnph/e1mNYlDq2xBo2TpAQIHRJD6ixOR79PSjpW0WEgKYtXm7ykLn80mONCiohcbWqxxY5lgMSixkr0YfVt/3gcIF/fmgW/nP2m5+JI4y0niw2pDO+vIBta0ZePT0pM628Twd2YGAeV9LT4YNnxWiwnGE3RllYy0uqblEf/UTv3srM2BaDhgK1iHml4mzh/RuYrSZRfx2rW8bJtseL3hIOC25b8UAVb7PwLLx71o4P5K3pp3w=="; x-acbes="EJ7pWnVv?jVWDEoFLrKvOkfI6uFuCl84nicx6VkGPDxb5TNHo2i?cNO5LoQa5fVJ"; at-acbes=Atza|IwEBIBXbH2_0CBP1lAGo49GoCWO_oVT7XkPSHBIviw8iFuBibE_W7pLKNxXuHG-6JPt4S0kcmFpICkjQOLY40QxGuEJvqEjBzPs37PQHg1Md3NnIVegMJHvH3lvW6B5qTV7AjNnFzmnrg88jIyDtDwTVxoNasZE_3JY0DnGPtx24bTJEI-9oE2OJUWw_rCJXrVByN8hApVFhPTISlcWgkAYFkpMf; sess-at-acbes="5oz0+U2/hWIABtUSuwK0QquWGCZbGRPb0Oc+58NsjBk="; sst-acbes=Sst1|PQGOWbZA571Na8DuwLGQtUCsC4_sdNTJ6Tnaz80HTBqH30lQ-0V53D9vCPVrhTY2Ah5uXLrOEmMu5SJTIXY9N7jmgLjQwjyfSJlluZiBnb5MjfHRtp_J_auyHwtjgf0WQrrJWtCABORSMzJZE4qDQ0doRH4Jw9fIFwryvPPYahXeF_vkByBDtpKjD0sM2ENl5_tR6RTZNN8id8eNvKosw2KNZ7RG5okVvjAlm-lTogZ7Nj4gzR7YBZLwNh9dWzeYQrfF4vrWrX1EAjcrwhI0XuWd5O32SxBa_Ach_eCgXq3Na1jCiY8s66QFWz20E9aZpaIm5jAurBmiC2SDuOrcuPUAng; lc-acbes=es_ES; i18n-prefs=EUR; session-id-time=2082758401l; csm-hit=tb:s-B8YZ67YMWMFTGFWXCXAH|1597392781562&t:1597392786984&adb:adblk_no'

    }
url='https://www.amazon.es/s?me=A176UH6T0U3XNJ&marketplaceID=A1RKKUPIHCS9HS' 
res= requests.post(url=url,headers=headers).text
etree_html =    etree.HTML(res)
src =  etree_html.xpath('//div[@class="a-row"]/a/@href')

with open('50.txt',"a",encoding='utf-8') as file: 
        for i in src:
            detail = 'https://www.amazon.es/'+i
            
            detail_res = requests.post(url=detail,headers=headers).text
            
            etree_detail = etree.HTML(detail_res)
            ranking_content = etree_detail.xpath('//*[@id="SalesRank"]/td[1]//text()')
            ranking_list = etree_detail.xpath('//*[@id="SalesRank"]/td[2]/ul/li//text()')
            #rankging_date= etree_detail.xpath('//div[@class="pdTab"]/table/tbody/tr[8]/td[1]//text()')
            rankging_date= etree_detail.xpath('//*[@id="prodDetails"]/div[2]/div[2]/div[1]/div[2]/div/div/table/tbody/tr[5]/td[2]//text()')
       
            write_content = detail+'\n'+ranking_content[0]+'\n'+ranking_list[1]+'\n'+ranking_list[3]+'\n'+ranking_list[4]+'\n'+rankging_date[0]+'\n\n'
            file.write(write_content) 
      
  
            print('抓取页面',detail)


 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值