本文主要使用Selenium调用谷歌浏览器,爬取前程无忧(https://mkt.51job.com)网站最近发布的招聘信息的前五页内容(本文以数据分析师为例子进行爬取),完整代码如下。
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import Byurl='https://search.51job.com/list/000000,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%25E5%25B8%2588,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
driver=webdriver.Chrome()
driver.get(url)
wait=WebDriverWait(driver,20)data=[]
details_links=[]
for