from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time
dr = webdriver.Chrome()
# dr = webdriver.PhantomJS()
def check_login(times):
"""
校验登录是否成功
:param times: 超时时间,每秒检查一次
:return:
"""
n = 0
while n <= times:
try:
check_login = dr.find_element_by_class_name("site-nav-user").text
if check_login != "":
print("恭喜用户:%s 登录成功" %str(check_login).strip())
break
except NoSuchElementException as msg:
print("第%d次找不到元素" %n,msg)
time.sleep(1)
n += 1
def write_txt(key,pageNums):
"""
写入csv文件
:param key:关键字
:param pageNums:最大页数
:return:
"""
with open("E://商品信息_%s.csv" % key, "w") as a:
num = 1
while num <= pageNums:
time.sleep(1)
# 商品价格
sp_price = dr.find_elements_by_xpath("//div[@id='mainsrp-itemlist']//div[@class='items']/div/div[2]/div[1]/div[1]/strong")
# 付款人数
fk_num = dr.find_elements_by_xpath("//div[@id='mainsrp-itemlist']//div[@class='items']/div/div[2]/div[1]/div[2]")
# 店铺名称
shop_name = dr.find_elements_by_xpath("//div[@id='mainsrp-itemlist']//div[@class='items']/div/div[2]/div[3]/div[1]/a/span[2]")
# 发货地
sp_fhd = dr.find_elements_by_xpath("//div[@id='mainsrp-itemlist']//div[@class='items']/div/div[2]/div[3]/div[2]")
# 商品标题
sp_bt = dr.find_elements_by_xpath("//div[@id='mainsrp-itemlist']//div[@class='items']/div/div[2]/div[2]/a")
for price in range(len(sp_price)):
sp_info = "类别名称:,"+key\
+ ",商品标题:,"+str(sp_bt[price].text).strip()\
+ ",店铺名称:,"+shop_name[price].text\
+ ",商品价格:,"+sp_price[price].text\
+ ",付款人数:,"+fk_num[price].text\
+ ",商品发货地:,"+sp_fhd[price].text\
+ "\n"
print(sp_info)
a.write(sp_info)
if num >= pageNums:
print("当前为第%d页数据"%pageNums)
else:
# 点击翻页
print("正在第%d次点击下一页" %num)
dr.find_element_by_xpath("//ul[@class='items']/li[3]").click()
num += 1
def go():
dr.get("https://www.taobao.com/")
dr.maximize_window()
time.sleep(1)
dr.find_element_by_xpath("//a[text()='亲,请登录']").click()
time.sleep(3)
# val = input("输入查询的关键字:")
# 校验登录
check_login(1000)
with open("E://Keywords.txt") as readKey:
read_list = readKey.readlines()
for key in read_list:
print("当前关键字:",key)
val = key.strip("\n")
dr.find_element_by_id("q").send_keys(val)
time.sleep(1)
dr.find_element_by_xpath("//button[@type='submit' and text()='搜索']").click()
time.sleep(1)
# 搜索结果页数
get_page_num = dr.find_element_by_xpath("//ul[@class='items']/li[2]").text
page_num = str(get_page_num).split("/")[1]
print("当前页数:",page_num)
if int(page_num) > 10:
print("当前页数大于10")
write_txt(val,10)
else:
print("当前页数小于10")
write_txt(val,int(page_num))
go()
time.sleep(3)
dr.quit()
爬取淘宝商品信息demo
最新推荐文章于 2024-08-09 10:04:21 发布