from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import re from pyquery import PyQuery as pq from selenium.webdriver.common.keys import Keys options = webdriver.ChromeOptions() options.binary_location = "E:/Google/Chrome/Application/chrome.exe" browser = webdriver.Chrome(chrome_options=options) wait = WebDriverWait(browser,10) def get_page_total(): # 获取总页数信息 try: browser.get('http://www.taobao.com') input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#q'))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#J_TSearchForm > div.search-button > button"))) input.send_keys('男装') button.click() total = browser.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.total') pattern = re.compile('(\d+)') result = int(re.search(pattern, total.text).group(1)) return result except Exception: print("获取页面总数失败!!") def next_page(): # 跳转下一页 try: input = browser.find_element_by_css_selector('#mainsrp-pager > div > div > div > div.form > input') botton = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit'))) num = int(browser.find_element_by_css_selector('#mainsrp-pager > div > div > div > ul > li.item.active > span').text) input.clear() input.send_keys(num+1) botton.click() except Exception: print('页面跳转失败!!') def get_info(): # 获取并解析信息 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item'))) html = browser.page_source doc = pq(html) items = doc('#mainsrp-itemlist .items .item').items() for item in items: result = { 'image': item.find('.pic .img').attr('data-src'), 'price': item.find('.price').text(), 'deal': item.find('.deal-cnt').text()[:-3], 'title': item.find('.pic .img').attr('alt'), 'shop': item.find('.location').text() } print(result) def main(): try: get_page_total() except Exception as e: print(e) finally: browser.close() if __name__ == '__main__': for i in range(1,get_page_total()): pass main()
python爬虫:selenium爬取淘宝美食信息
最新推荐文章于 2020-03-05 22:51:51 发布