获取腾讯社会招聘的职位信息

import requests
from lxml import etree
import pymysql

class mysql_conn():
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','root','wang')
        self.cursor = self.db.cursor()
    def execute_modify_mysql(self,sql,data):
        self.cursor.execute(sql,data)
        self.db.commit()
    def __del__(self):
        self.db.close()

sql = 'insert into tengxun(zhiwei,place,leibie,renshu,zhize,yaoqiu) values(%s,%s,%s,%s,%s,%s)'
sq = mysql_conn()
for i in range(1,4):

    url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start={}#a'.format((i-1)*10)
    headers = {
        # 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        # # 'Accept-Encoding':'gzip, deflate, br',
        # 'Accept-Language':'zh-CN,zh;q=0.9',
        # 'Cache-Control':'max-age=0',
        # 'Connection':'keep-alive',
        # 'Cookie':'pgv_pvi=6554744832; PHPSESSID=0fm5ui9r7qcqll83btu7qr2jq0; pgv_si=s1014036480',
        # 'Host':'hr.tencent.com',
        # 'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    }
    response = requests.get(url,headers=headers)
    #不保存到本地看不见数据
    # with open('tengxun.html','wb') as f:
    #     f.write(response.content)
    html = response.text

    html_ele = etree.HTML(html)

    tr_list = html_ele.xpath('//table[@class="tablelist"]/tr')[2:-1]
    for res_tr in tr_list:
        # print(res_tr)
        t_url = 'https://hr.tencent.com/'
        zhiwei = res_tr.xpath('./td[1]/a')[0].text
        # print(zhiwei)
        zhiwei_url = t_url + res_tr.xpath('./td[1]/a/@href')[0]
        # print(zhiwei_url)
        response2 = requests.get(zhiwei_url,headers=headers)
        # print(response2.text)
        # with open('tengxun_zhiwei.html','wb') as f:
        #     f.write(response2.content)
        html2 = response2.text
        html_ele2 = etree.HTML(html2)
        # 获取职位信息
        place_str =  html_ele2.xpath('//tr[@class="c bottomline"]/td[1]/span/text()')[0]
        zhiwei_place =place_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[1]/text()')[0]
        # print(zhiwei_place)
        leibie_str = html_ele2.xpath('//tr[@class="c bottomline"]/td[2]/span/text()')[0]
        zhiwei_leibie =leibie_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[2]/text()')[0]
        # print(zhiwei_leibie)
        renshu_str = html_ele2.xpath('//tr[@class="c bottomline"]/td[3]/span/text()')[0]
        zhiwei_renshu =renshu_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[3]/text()')[0]
        # print(zhiwei_renshu)
        zhize_str = html_ele2.xpath('//div[@class="lightblue"]/text()')[0]
        zhiwei_zhize =zhize_str + ''.join(html_ele2.xpath('//ul[@class="squareli"]')[0].xpath('./li/text()'))
        # print(zhiwei_zhize)
        yaoqiu_str = html_ele2.xpath('//div[@class="lightblue"]/text()')[1]
        # print(yaoqiu_str)
        zhiwei_yaoqiu =yaoqiu_str + ''.join(html_ele2.xpath('//ul[@class="squareli"]')[1].xpath('./li/text()'))
        # print(zhiwei_yaoqiu)
        data = (zhiwei,zhiwei_place,zhiwei_leibie,zhiwei_renshu,zhiwei_zhize,zhiwei_yaoqiu)
        sq.execute_modify_mysql(sql,data)
    print('第%d页存储完毕'% i)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值