爬取51job的,Java职位,使用selenium和xpath,

最新推荐文章于 2022-10-16 10:42:13 发布

xtggbmdk

最新推荐文章于 2022-10-16 10:42:13 发布

阅读量425

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/xtggbmdk/article/details/104406837

版权

python 专栏收录该内容

52 篇文章 0 订阅

订阅专栏

1.代码:

from selenium.webdriver import Chrome
import time
web = Chrome()
#实现自动登录
web.get("https://login.51job.com/login.php")
#浏览器去找输入用户名的文本框
#向这个文本框里输入用户名
web.find_element_by_xpath('//*[@id="loginname"]').send_keys("13689297888")#输入账号
web.find_element_by_xpath('//*[@id="password"]').send_keys("zxz31194")#输入密码
web.find_element_by_xpath('//*[@id="login_btn"]').click() #点击"登录"按钮
web.find_element_by_xpath('//*[@id="topIndex"]/div/p/a[2]').click()#点击"职位搜索"
web.find_element_by_xpath('//*[@id="kwdselectid"]').send_keys("java")#输入"java"
time.sleep(2)
web.find_element_by_xpath('//*[@id="work_position_click"]/p').click()#点击"全国"
time.sleep(2)
web.find_element_by_xpath('//*[@id="work_position_click_center_right_list_category_000000_200200"]').click()#选点"西安"

time.sleep(2)
web.find_element_by_xpath('//*[@id="work_position_click_bottom_save"]').click()#点击"确定"
time.sleep(2)
web.find_element_by_xpath('/html/body/div[2]/form/div/div[1]/button').click()#点击"搜索"
web.find_element_by_xpath('//*[@id="work_position_input"]').click()
# 干掉所有已选地区. 更换成全国
time.sleep(1)

#f = open("51job.csv", mode="w", encoding="utf-8")
f = open("51job.csv", mode="w", encoding="gbk")#双击的是时候excel默认使用GBk解码,如果使用utf-8会出现乱码,但是gbk码不够全,第一个字符变成BOM就可以了
#trList = web.find_element_by_id('resultList').find_element_by_class_name('el')
while 1:#不停的进行循环拿取下一页的数据
    #开始那数据
    trList = web.find_element_by_id('resultList').find_elements_by_class_name('el')
    for tr in trList:
        if "title" not in tr.get_attribute("class"):
            job_name = tr.find_element_by_class_name("t1").text
            company_name = tr.find_element_by_class_name("t2").text
            address = tr.find_element_by_class_name("t3").text
            job_salary = tr.find_element_by_class_name("t4").text
            job_data = tr.find_element_by_class_name("t5").text
            f.write(f"{job_name},{company_name},{address},{job_salary},{job_data}\n")
    web.find_element_by_xpath('//*[@id="resultList"]/div[55]/div/div/div/ul/li[last()]').click()
    time.sleep(2)

2.结果