from selenium import webdriver
from bs4 import BeautifulSoup
import lxml
import re
import time
driver = webdriver.Chrome()
driver.get('https://www.taobao.com/')
q = driver.find_element_by_id('q')
q.send_keys('python')
driver.find_element_by_class_name('search-button').click()
total_text = driver.find_element_by_class_name('total').text
total_page = re.search('(\d+)', total_text).group(1)
print(total_page)
i = 0
while(i < int(total_page)):
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
items = soup.select('#mainsrp-itemlist .items .item')
for item in items:
product = {
'name':item.select('.title')[0].get_text().strip(),
'price':item.select('.g_price-highlight > strong')[0].get_text(),
'deal-cnt':item.select('.deal-cnt')[0].get_text(),
'shop':item.select('.shop')[0].get_text().strip(),
'location':item.select('.location')[0].get_text()
}
print(product)
blank = driver.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.form > input')
blank.clear()
blank.send_keys(i + 1)
i = i + 1;
jump = driver.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit')
jump.click()
time.sleep(3)
python爬取淘宝商品数据
最新推荐文章于 2024-08-09 10:04:21 发布