Python爬虫 使用Selenium爬取腾讯招聘信息

使用Selenium爬取腾讯招聘信息,并保存excel

  • 代码比较简单,直接上源码
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from lxml import etree
import xlwt


class Tencent(object):
    def __init__(self, url):
        self.url = url
        self.driver = webdriver.Chrome()
        self.data_list = []
        self.main()

    # 返回页面内容
    def get_content_by_selenium(self, url):
        self.driver.get(url)
        # 显示等待 直到div[@class="correlation-degree"]'加载出来
        wait = WebDriverWait(self.driver, 20)
        wait.until(EC.presence_of_all_elements_located((By.XPATH,'//div[@class="correlation-degree"]')))
        return self.driver.page_source

    # 解析页面 使用xpath获取元素
    def parse_div(self, div_list):
        for div in div_list:
            data = {}
            try:
                job_title = div.xpath('.//h4/text()')[0]
                job_address = div.xpath('.//a/p/span[2]/text()')[0]
                job_type = div.xpath('.//a/p/span[3]/text()')[0]
                job_time = div.xpath('.//a/p/span[4]/text()')[0]
                job_detail = div.xpath('.//a/p[2]/text()')[0].replace('\n', '')
                data['岗位名称'] = job_title
                data['工作地点'] = job_address
                data['工作类型'] = job_type
                data['发布时间'] = job_time
                data['职位描述'] = job_detail
                print(data)
                self.data_list.append(data)
            except Exception:
                pass

    # 写入excel
    def write_excel(self,filename, sheetname, data_list):
        # 创建workbook
        workbook = xlwt.Workbook(encoding='utf-8')
        # 给工作表添加sheet表单
        sheet = workbook.add_sheet(sheetname)
        # 设置表头
        head = []
        for i in data_list[0].keys():
            head.append(i)
        # 将表头写入excel
        for i in range(len(head)):
            sheet.write(0, i, head[i])
        # 写内容
        i = 1
        for item in data_list:
            for j in range(len(head)):
                sheet.write(i, j, item[head[j]])
            i += 1
        # 保存
        workbook.save(filename)
        print('写入成功')

    def main(self):
        for i in range(1,11):
            html_str = self.get_content_by_selenium(self.url % i)
            html = etree.HTML(html_str)
            div_list = html.xpath('//div[@class="recruit-wrap recruit-margin"]/div')
            self.parse_div(div_list)
        self.write_excel('tencent.xls','job',self.data_list)
        self.driver.close()
        self.driver.quit()

if __name__ == '__main__':
    base_url = 'https://careers.tencent.com/search.html?index=%s'
    Tencent(base_url)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值