爬取淘宝美食
操作流程:1.搜索关键字,利用selenium驱动浏览器搜索关键字,得到查询后的商品列表
2.得到商品页码数,模拟翻页,得到后续页面的商品列表
3.分析提取商品内容,利用pyquery分析源码,解析得到商品列表
4.存储至mongodb
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
from pyquery import PyQuery as pq
from config import *
import pymongo
client=pymongo.MongoClient(MONGO_URL)
db=client[MONGO_DB]
chrome_diver="E:\Googedownload\chromedriver_win32\chromedriver.exe"
browser=webdriver.Chrome(executable_path=chrome_diver)
wait=WebDriverWait(browser,10)
def search():#获取首页数据
try:
browser.get('https://www.taobao.com')
input=wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'#q'))
)
su