# -*- coding: utf-8 -*-
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
import time
from lxml import etree
from pymongo import MongoClient
browser = webdriver.Chrome(executable_path='C:\My Tool\Anaconda3\envs\learn\Scripts\chromedriver.exe')
wait = WebDriverWait(browser, 10)
browser.maximize_window()
def get_response(page):
url = 'https://search.jd.com/Search?keyword=华硕电脑&enc=utf-8&page='
browser.get(url + str(page))
for i in range(8):
js ='window.scrollTo(0,%s)'%(i*1500)
browser.execute_script(js)
time.sleep(1)
get_html()
def get_html():
html = etree.HTML(browser.page_source)
for i in html.xpath('//*[@id="J_goodsList"]/ul/li'):
data = {
"title" : i.xpath('string(//div[3]/a/em)'),
"place" : i.xpath('string(.//div[2]/strong/i)'),
"shop" : i.xpath('string(.//div[5]/span/a)'),
"jpg" : i.xpath('string(./div/div[1]/a/img/@src)'),
"nums" : i.xpath('string(./div/div[4]/strong/a)'),
"title" : i.xpath('string(//div[3]/a/em)'),
}
save_Mongo_DB(data)
# print(data)
def save_Mongo_DB(data):
MongoClient("localhost")['JD']["jd"].insert(dict(data))
if __name__ == "__main__":
for i in range(0,22,3):
get_response(i)