"""
Created by Young on 2019/1/23 10:25
"""
import pymongo
from selenium import webdriver
from lxml import etree
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import re
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
url = 'https://www.lagou.com/jobs/list_python%E7%88%AC%E8%99%AB?oquery=python%E5%90%8E%E7%AB%AF&fromSearch=true&labelWords=relative'
client = pymongo.MongoClient('localhost',27017)
lagou = client['lagou']
meishi_info = lagou['lagou_job']
def job_link(url):
driver.get(url)
while True:
source = driver.page_source
time.sleep(2)
page_list(source)
next_btn = wait.until(EC.element_to_b
selenium爬取拉勾网数据并进行可视化分析
最新推荐文章于 2023-06-23 20:15:56 发布
本文使用Selenium库爬取拉勾网职位信息,包括职位名称、薪资、公司等,然后对数据进行预处理,利用matplotlib和seaborn进行可视化分析,揭示互联网行业薪资分布、热门职位等趋势。
摘要由CSDN通过智能技术生成