# -*- coding: utf-8 -*-
"""
Created on Fri Dec 6 09:33:51 2019
@author: 18352
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
import time
import json
import os
import csv
s = time.time()
#class SaveCSV(object):
# def save(self, keyword_list,path, item):
# """
# 保存csv方法
# :param keyword_list: 保存文件的字段或者说是表头
# :param path: 保存文件路径和名字
# :param item: 要保存的字典对象
# :return:
# """
# try:
# # 第一次打开文件时,第一行写入表头
# if not os.path.exists(path):
# with open(path, "w", newline='', encoding='gbk') as csvfile: # newline='' 去除空白行
# writer = csv.DictWriter(csvfile, fieldnames=keyword_list) # 写字典的方法
# writer.writeheader() # 写表头的方法
#
# # 接下来追加写入内容
# with open(path, "a", newline='', encoding='gbk') as csvfile: # newline='' 一定要写,否则写入数据有空白行
# writer = csv.DictWriter(csvfile, fieldnames=keyword_list)
# writer.writerow(item) # 按行写入数据
# print("^_^ write success")
#
# except Exception as e:
# print("write error==>", e)
# # 记录错误数据
# with open("error.txt", "w") as f:
# f.write(json.dumps(item) + ",\n")
# pass
#
#item_list = [
# "title",
# "prcie",
# "sales",
# "评价"
# ]
#t0 = time.strftime("%Y%m%d%H%M")
#path = ".\爬虫结果\销量排行&{}.csv".format(t0)
browser = webdriver.Chrome()
#browser = webdriver.PhantomJS()
wait = WebDriverWait(browser,10)
browser.get('https://chaoshi.tmall.com')
#先获取cookie
time.sleep(30)
#print(browser.get_cookies())
=
#
for i in cookies:
if 'expiry' in i:
del i['expiry']
## print(i)
browser.add_cookie(i)
#
#
#
#
#input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mq')))
#input.send_keys('大米')
##
#submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mallSearch > form > fieldset > div > button')))
#submit.click()
tmall = browser.current_window_handle
def list_click(i):
try:
browser.switch_to.window(tmall)
to1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > div.product-img > a'.format(i+1))))
#J_ProductList > li:nth-child(20) > div > h3 > a #J_ProductList > li:nth-child(20) > div > div.product-img > a
to1.click()
handles = browser.window_handles
list_ = None
for handle in handles:
if handle != tmall:
list_ = handle
browser.switch_to.window(list_)
try:
def pick():
title = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > div.tb-detail-hd > h1')))
print(title.text)
price = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_PromoPrice > dd > div > span')))
print(price.text)
num = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > ul > li.tm-ind-item.tm-ind-sellCount > div > span.tm-count')))
print(num.text)
pkjx = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ItemRates > div > span.tm-count')))
print(pkjx.text)
except TimeoutException:
return pick()
pick()
browser.close()
browser.switch_to.window(tmall)
except TimeoutException:
browser.switch_to.window(tmall)
pass
for i in range(18,22):
list_click(i)
e = time.time()
print("用时{}".format(e-s))
爬虫天猫超市失败
最新推荐文章于 2024-08-08 22:15:00 发布