from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from random import randint
from pyquery import PyQuery as pq
import pymongo
class JD(object):
def __init__(self):
self.options = webdriver.ChromeOptions()
# 无痕模式
self.options.add_argument("headless")
self.browser = webdriver.Chrome(
executable_path=r"C:\Users\dell\AppData\Local\Google\Chrome\Application\chromedriver.exe",
chrome_options=self.options
)
# 浏览器最大化
self.browser.maximize_window()
self.keyword = "iphone"
self.url = "https://www.jd.com/?cu=true&utm_source=baidu-search&utm_medium=cpc&utm_campaign=t_262767352_baidusearch&utm_term=106807362512_0_1ea216375c8242409e3b4487043f782b"
def scroll(self):
self.browser.execute_script("""
(function () {
var y = document.body.scrollTop;
var step = 100;
window.scroll(0, y);
function f() {
if (y < document.body.scrollHeight) {
y += step;
window.scroll(0, y);
setTimeout(f, 50);
}
else {
window.scroll(0, y);
document.title += "scroll-done";
}
}
setTimeout(f, 1000);
})();
""")
def skip(self):
"""
主页面跳转到detail界面
:return:
"""
self.browser.get(self.url)
wait = WebDriverWait(self.browser, 5)
# 等待输入框出现
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#key')))
# 等待搜索按钮出现
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".button")))
time.sleep(2)
# 清空出现的关键字
input.clear()
# 写入我们需要的关键字
input.send_keys(self.keyword)
time.sleep(2)
# 模拟点击
submit.click()
# 跳转到商品信息界面
time.sleep(randint(1, 3))
# 页面下拉
self.scroll()
time.sleep(5)
html = self.browser.page_source
doc = pq(html)
# 获取商品的总页数
count = doc(".p-skip b").text()
return int(count)
def getData(self):
self.browser.get(
"https://search.jd.com/Search?keyword=iphone&enc=utf-8&suggest=1.def.0.V16--12s0,20s0,38s0,97s0&wq=ip&pvid=652942ef6a96487c830988faa7a2d8e6")
html = self.browser.page_source
doc = pq(html)
items = doc(".gl-item").items()
for index, item in enumerate(items):
product = {
"href": "https:{}".format(str(item(".p-img a").attr("href"))),
"title": str(item(".p-img a").attr("title")).replace("\n", " "),
"price": item(".p-price").text(),
"name": str(item(".p-name em").text()).replace("\n", " "),
"commit": item(".p-commit").text(),
"shop": item(".p-shop a").text() + " " + "https:" + item(".p-shop a").attr("href"),
"icons": item(".p-icons").text(),
"stock": item(".p-stock").text()
}
time.sleep(randint(1, 2))
print(index, product)
self.write(product)
self.save_to_mongo(product)
def next(self):
wait = WebDriverWait(self.browser, 3)
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".pn-next")))
time.sleep(3)
submit.click()
print(self.browser.current_url)
time.sleep(2)
self.scroll()
def write(self, content):
with open(r"G:\个人总结\csv\jd_iphone.csv", 'a+', encoding="utf-8") as file:
file.write(str(content) + "\n")
def save_to_mongo(self, result):
MONGO_URL = "localhost"
MONGO_DB = "JingDong"
MONGO_COLLECTION = "products"
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]
try:
db[MONGO_COLLECTION].insert_one(result)
except Exception:
print("存储到MongoDB失败")
def execute(self):
count = self.skip()
for i in range(count):
print("第{}页".format(i + 1))
self.getData()
self.next()
if __name__ == "__main__":
JD().execute()
python 爬京东商品信息
最新推荐文章于 2024-09-30 09:30:48 发布