京东电饭锅页面爬虫
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
options = ChromeOptions()
options.add_argument('blink-settings=imagesEnabled=false')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
b = Chrome(options=options)
b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=388bf700598a4288b08f0bd375c4f93e')
time.sleep(1)
for page in range(10):
for _ in range(10):
b.execute_script('window.scrollBy(0, 800)')
time.sleep(1)
soup = BeautifulSoup(b.page_source, 'lxml')
goods_div = soup.select('#J_goodsList .gl-i-wrap')
for div in goods_div:
name_a = div.select_one('.p-name a')
name = name_a.text
goods_url = 'https:' + name_a.attrs['href']
price = div.select_one('.p-price i').text.strip()
commit = div.select_one('.p-commit a').text.strip()
shop_a = div.select_one('.p-shop a')
shop_name = shop_a.text
shop_url = 'https:' + shop_a.attrs['href']
print(name, goods_url, price, commit, shop_name, shop_url)
print('---------------------1页数据解析完成----------------------')
next = b.find_element(By.CLASS_NAME, 'pn-next')
next.click()
time.sleep(3)
input('end:')
京东电饭锅详情页爬虫
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
options = ChromeOptions()
options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
options.add_experimental_option('excludeSwitches', ['enable-automation'])
b = Chrome(options=options)
b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=388bf700598a4288b08f0bd375c4f93e')
all_goods_a = b.find_elements(By.CSS_SELECTOR, '#J_goodsList .gl-i-wrap>.p-img>a')
for a in all_goods_a:
a.click()
time.sleep(1)
b.switch_to.window(b.window_handles[-1])
time.sleep(2)
b.execute_script('window.scrollBy(0, 900)')
time.sleep(1)
comment = b.find_elements(By.CSS_SELECTOR, '#detail>.tab-main>ul>li')[-2]
comment.click()
time.sleep(2)
print(b.page_source)
print('================================获取完一个商品===============================')
b.close()
b.switch_to.window(b.window_handles[0])
input('end:')