腾讯招聘爬取

import requests
import re
from lxml import etree
from mysql import MysqlHelper
# 实例化mysql封装的类
mc = MysqlHelper()
# sql语句
sql = 'INSERT INTO tencent(didian,zhiwei,people,works) VALUE (%s,%s,%s,%s)'
def tencent(url,headers):
    for i in range(0,30,10):
        fullurl = url.format(i)
        response = requests.get(fullurl,headers=headers)
        html = response.text
        # print(html)
        data_url = re.findall(r'<a target="_blank" href=(.*?)>',html)
        # print(data_url)
        for i in data_url:
            # print(i)
            data_urls = 'https://hr.tencent.com/' + i.replace('"','')
            # print(data_urls)
            response = requests.get(data_urls,headers=headers)
            # print(response)
            html = response.text
            # print(html)
            html_data = etree.HTML(html)
            # print(html_data)
            zhaopin = html_data.xpath('//table[@class="tablelist textl"]')
            # print(zhaopin)
            for i in zhaopin:
                # 取出工作地点
                didian = i.xpath('./tr[@class="c bottomline"]/td[1]/text()')[0]
                # print(didian)
                # 职位
                zhiwei = i.xpath('./tr[@class="c bottomline"]/td[2]/text()')[0]
                # print(zhiwei)
                # 人数
                people = i.xpath('./tr[@class="c bottomline"]/td[3]/text()')[0]
                # print('---' * 50)
                # print(people)
                # print('---'*50)
                # 职责
                work = i.xpath('./tr[@class="c"]/td/ul/li/text()')
                work = str(work).replace(',','')
                print(work)
                # print(type(work))


                data = (didian, zhiwei, people, work)

                mc.execute_modify_sql(sql,data)
if __name__ == '__main__':
    url = 'https://hr.tencent.com/position.php?&start={}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }

    tencent(url,headers)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值