爬虫:腾讯招聘

#mysqlhelper.py
import pymysql

class MysqlHelper(object):
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='123456', database='py1011', charset='utf8')
        self.cursor = self.db.cursor()

    def execute_modify_sql(self,sql, data):
        self.cursor.execute(sql, data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

if __name__ == '__main__':
    conn = MysqlHelper()
    # conn.execute_modify_sql('insert into lianjiaxinxi(title) VALUE (%s)', data=('huzeqi hehehe'))
import requests
import re
from lxml import etree
import mysqlhelper

myhelper = mysqlhelper.MysqlHelper()
sql = 'INSERT INTO tengxun (title, location_t, type_t, renshu_t,zhize,yaoqiu) VALUES (%s, %s, %s, %s, %s, %s)'

for i in range(0,40,10):
    url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start="%s"%i#a'

    headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
            }

    response = requests.get(url,headers=headers)

    # with open('tengxun.html', 'wb') as f:
    #     f.write(response.content)
    html = response.text

    zhiwei_url_list = re.findall('<a target="_blank" href="(.*?)">',html)

    # print(zhiwei_url_list)

    for zhiwei_item in zhiwei_url_list:
        zhiwei_url = 'https://hr.tencent.com/'+zhiwei_item
        # print(zhiwei_url)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
        }

        response = requests.get(zhiwei_url, headers=headers)

        html_ele = etree.HTML(response.text)

        title = html_ele.xpath('//tr[@class="h"]/td')[0].text
        print(title)

        location = html_ele.xpath('//tr[2]/td[1]/span')[0].text
        textone = html_ele.xpath('//tr[2]/td[1]/text()')[0]
        location_t = location + textone
        print(location_t)

        type = html_ele.xpath('//tr[2]/td[2]/span')[0].text
        texttwo = html_ele.xpath('//tr[2]/td[2]/text()')[0]
        type_t = type + texttwo
        print(type_t)

        renshu = html_ele.xpath('//tr[2]/td[3]/span')[0].text
        textthree = html_ele.xpath('//tr[2]/td[3]/text()')[0]
        renshu_t = renshu + textthree
        print(renshu_t)

        gongzuozhize = html_ele.xpath('//tr[3]/td/div')[0].text
        print(gongzuozhize)
        zhizeli_list = html_ele.xpath('//tr[3]/td/ul/li')
        zhize = ''
        # zhizeli_str = ','.join(zhizeli_list)
        for zzneirong in zhizeli_list:
            zhizeneirong = zzneirong.text
            # zhize = gongzuozhize + zhizeli_str
            zhize += zhizeneirong+'\n'
        print(zhize)

        gongzuoyaoqiu = html_ele.xpath('//tr[4]/td/div')[0].text
        print(gongzuoyaoqiu)
        yaoqiuli_list = html_ele.xpath('//tr[4]/td/ul/li')
        try:
            yaoqiu = ''
            # yaoqiuli_str = ','.join(yaoqiuli_list)
            for yqneirong in yaoqiuli_list:
                yaoqiuneirong = yqneirong.text
                # yaoqiu = gongzuoyaoqiu + yaoqiuli_str
                yaoqiu += yaoqiuneirong+'\n'
            print(yaoqiu)
        except:
            yaoqiu = '本科学历'+'\n'+'相关工作经验1年以上'
            print(yaoqiu)
        print('-'*50)

        data = (title, location_t, type_t, renshu_t,zhize,yaoqiu)
        myhelper.execute_modify_sql(sql, data)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值