1、流程框架
搜索关键字
分析页码并翻页
分析提取商品内容
存储至MongoDB
2、spider.py
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
from pyquery import PyQuery as pq
from taobaomeishi.config import *
import pymongo
client=pymongo.MongoClient(MONGO_URL)
db=client[MONGO_DB]
browser=webdriver.Chrome()
# browser=webdriver.PhantomJS() #PhantomJS
wait=WebDriverWait(browser,10)
def search():
try:
browser.get('https://www.taobao.com')
input= wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#q"))
)
submit=WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_TSearchForm > div.search-button > button')))
inpu