爬取腾讯招聘岗位信息

from lxml import etree
from selenium import webdriver
import csv
import time

class Job:
    def __init__(self,f,fieldname):
        self.browser = webdriver.Chrome()
        self.f = f
        self.fieldname = fieldname
        self.writer = csv.DictWriter(self.f,fieldnames=self.fieldname)
        self.writer.writeheader()
        self.URL = []

    def Add_URL(self,index):
        self.browser.get('https://hr.tencent.com/position.php?keywords=&tid=87&lid=2268&start={}#a'.format(index))
        source = self.browser.page_source
        html = etree.HTML(source)
        table = html.xpath('//table[@class="tablelist"]')[0]
        tds = table.xpath('.//td[@class="l square"]')
        for td in tds:
            self.URL.append(td.xpath('./a/@href')[0])
        # time.sleep(2)
        # self.browser.close()

    def save_information(self,url):
        self.browser.get(url)
        source = self.browser.page_source
        html = etree.HTML(source)
        table = html.xpath('//table[@class="tablelist textl"]')[0]
        trs = table.xpath('.//tr')
        title = trs[0].xpath('./td/text()')[0]
        tds = trs[1].xpath('.//td')
        place = tds[0].xpath('text()')[0]
        Class = tds[1].xpath('text()')[0]
        number_of_people = tds[2].xpath('text()')[0]
        ul1 = trs[2].xpath('.//ul')[0]
        lis1 = ul1.xpath('.//li')
        duty_of_work = ''
        job_requirement = ''
        for li in lis1:
            try:
                duty_of_work += li.xpath('text()')[0]
            except IndexError:
                pass
        ul2 = trs[3].xpath('.//ul')[0]
        lis2 = ul2.xpath('.//li')
        for li in lis2:
            job_requirement += li.xpath('text()')[0]
        self.writer.writerow({'岗位':title,'工作地点':place,'职位类别':Class,'招聘人数':number_of_people,'工作职责':duty_of_work,'工作要求':job_requirement})
        # time.sleep(2)
        # self.browser.close()

if __name__ == '__main__':
    base_url = 'https://hr.tencent.com/'
    f = open('腾讯岗位.csv','a',encoding='utf-8')
    fieldname = ['岗位','工作地点','职位类别','招聘人数','工作职责','工作要求']
    job = Job(f,fieldname)
    for i in range(4):
        job.Add_URL(str(i*10))
    for url in job.URL:
        job.save_information(base_url+url)
    f.close()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值