tao_bao.py-CSDN博客

本文链接：https://blog.csdn.net/weixin_42060598/article/details/83027561

from selenium import webdriver
from bs4 import BeautifulSoup
import time
import random
driver = webdriver.Chrome()
driver.maximize_window()
def get_info(url,page):
"""
获取商品信息并保存到phone_honor.txt文件中
:param url: 商品信息的网址
:return: 返回列表infos，元素为字典：价格、付款人数、商品名称、商铺名称、位置
"""
try:
print("正在获取第%d页信息"%page)
page += 1
driver.get(url)
driver.implicitly_wait(10)
html = driver.page_source
soup = BeautifulSoup(html,"html.parser")
# items是所有的商品的HTML列表
items = soup.find('div',{'class':'m-itemlist'}).find_all('div',{'data-category':'auctions'})
# 循环遍历items得到每一件商品item的信息
for item in items:
info = {
'price':item.find('div',{'class':'price g_price g_price-highlight'}).get_text().strip(),
'payment_number':item.find('div',{'class':'deal-cnt'}).get_text().strip(),
'good_name':item.find('div',{'class':'row row-2 title'}).get_text().strip(),
'shop':item.find('div',{'class':'shop'}).get_text().strip(),
'location':item.find('div',{'class':'location'}).get_text(),
}
with open('./phone_honor.txt','a',encoding='utf-8') as f:
f.write(info['price']+","+info['payment_number']+","+info['good_name']+","+info['shop']+","+info['location']+"\n")
print("写入文件成功！")
if page <= 100:
next_page(url,page)
else:
print("爬取任务结束！！！")
except Exception as e:
print("error:",e)
get_info(url,page)
def next_page(url,page):
driver.get(url)
driver.implicitly_wait(10)
driver.find_element_by_xpath('//a[@trace="srp_bottom_pagedown"]').click()
# 休息0~10秒
second = 10*random.random()
time.sleep(second)
driver.get(driver.current_url)
driver.implicitly_wait(10)
get_info(driver.current_url,page)
# 测试用的主函数
if __name__ == '__main__':
page = 1
# 模拟浏览器进行搜索荣耀10手机
url = "https://www.taobao.com/"
name = "荣耀10"
driver.get(url)
driver.implicitly_wait(10)
# 模拟人输入商品名称
driver.find_element_by_id("q").clear()
driver.find_element_by_id("q").send_keys(name)
# 点击搜索按钮
driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click()
# 将当前网页传入get_info函数中
get_info(driver.current_url,page