python爬虫天猫超市

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
import time

s = time.time()
#
browser = webdriver.Chrome()
#browser = webdriver.PhantomJS()
#option = webdriver.ChromeOptions()
#option.add_argument('headless')
#option.add_argument('no-sandbox')
#option.add_argument('disable-dev-shm-usage')
#browser = webdriver.Chrome(chrome_options=option)
wait = WebDriverWait(browser,10)

browser.get('https://www.tmall.com/')
#先获取cookie

#print(browser.get_cookies())

#cookies = 
#
#for i in cookies:  
#    if 'expiry' in i:
#        del i['expiry']
##    print(i) 
#    browser.add_cookie(i)

tmiu = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#content > div.main-nav > div > div > div > a:nth-child(1) > img')))
tmiu.click()
tmall0 = browser.current_window_handle
handles0 = browser.window_handles
list_0 = handles0[1]
browser.switch_to.window(list_0) 

#time.sleep(20)

input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mq')))
input.send_keys('大米')

button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mallSearch > form > fieldset > div > button')))
button.click()

by = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#filterForm > div > ul.filter-sort > li:nth-child(3) > a > span')))
by.click()

tmall = browser.current_window_handle


def list_click(i):
    txt = open('03天猫获取大米销量数据.txt', "a")
    try:
        browser.switch_to.window(tmall) 
        to1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > h3 > a'.format(i+1))))                                    
        to1.click()
        tnum = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > div.item-summary > div.item-sum > strong'.format(i+1))))
        print('总销量:{}'.format(tnum.text))
        txt.write('总销量:{}'.format(tnum.text)+"~")
        href = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > h3 > a'.format(i+1))))
        print(href.get_attribute('href'))
        txt.write('href:{}'.format(href.get_attribute('href'))+"~")
        dic1 = {}
        dic1['总销量'] = tnum.text                         
        handles = browser.window_handles

        list_ = handles[-1]
        
        browser.switch_to.window(list_) 
        
        try:
            def pick():
                title = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > div.tb-detail-hd > h1')))
                print(title.text)
                txt.write('title:{}'.format(title.text)+"~")
                price = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_PromoPrice > dd > div > span')))
                print(price.text)
                txt.write('price:{}'.format(price.text)+"~")
                num = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > ul > li.tm-ind-item.tm-ind-sellCount > div > span.tm-count')))
                print(num.text)
                txt.write('num:{}'.format(num.text)+"~")
                pkjx = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ItemRates > div > span.tm-count')))
                print(pkjx.text)
                txt.write('评价:{}'.format(pkjx.text)+"\n")
        except TimeoutException:
            browser.close()
            txt.write('\n')
        pick()
        
        
        browser.close()
        browser.switch_to.window(tmall)
        
    except TimeoutException:
        browser.switch_to.window(tmall)
        txt.write('\n')
        pass
    txt.close()
n = 0
while n < 3:
    for i in range(8,10):
        list_click(i)
        time.sleep(1)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#content > div.main > div > div.list-bottom > div > div > a.page-next')))
    button.click()
    n += 1
e = time.time()
print("用时{}".format(e-s))

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值