爬虫--腾讯招聘

最新推荐文章于 2024-08-03 19:27:22 发布

slash宋

最新推荐文章于 2024-08-03 19:27:22 发布

阅读量290

点赞数

文章标签：爬虫

本文链接：https://blog.csdn.net/weixin_42959522/article/details/81865218

版权

import requests
from lxml import etree
import pymysql

class MysqlHelper(object):
    # 初始化的构造函数
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='py101',charset='utf8')
        self.cursor = self.db.cursor()
    # 执行修改操作
    def mysql_do(self,sql):
        self.cursor.execute(sql)
        self.db.commit()

    # 结束函数
    def __del__(self):
        self.cursor.close()
        self.db.close()
#1 .在列表页获取详情页的url
def a ():
    for i in range(0,4):
        mysql_ = MysqlHelper()
        url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start={}'.format(i*10)
        headers = {
        'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
        'Host': 'hr.tencent.com',
        'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=10',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',

        }
        response = requests.get(url,headers=headers)
        # print(response)
        html_ele = etree.HTML(response.text)
        li_list = html_ele.xpath('//table[@class="tablelist"]/tr')
        # print(li_list)
        li_list = li_list[1:]
        # print(li_list)
        li_list = li_list[:-1]
        #遍历
        for li_ele in li_list:
            name = li_ele.xpath('./td[1]/a/@href')[0]
            print(name)
            particulars_url = 'https://hr.tencent.com/'+name# 分页url
            print(particulars_url)
            b(particulars_url,mysql_)

        # break
def b(particulars_url,mysql):
    # print(particulars_url)
    headers ={
    'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
    'Host': 'hr.tencent.com',
    'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    response = requests.get(particulars_url,headers=headers)
    print(response)
    html_ele = etree.HTML(response.text)
    # print(html_ele)
    li_list = html_ele.xpath('//table[@class="tablelist textl"]/tr')
    # print(li_list)
    position =html_ele.xpath('//table[@class="tablelist textl"]/tr[1]/td/text()')[0]
    print(position)
    site = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[1]/text()')[0]
    print(site)
    category = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[2]/text()')[0]
    print(category)
    people = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[3]/text()')[0]
    print(people)
    x_list = html_ele.xpath('//table[@class="tablelist textl"]/tr[3]/td/ul/li/text()')

    # print(x_list)
    for list_ in x_list:


        duty = list_
        print(duty)
    sql = 'insert into p222(position,site,category,people,duty)values({},{},{},{},{})'.format(repr(position),repr(site),repr(category),repr(people),repr(duty))
    print(sql)
    mysql.mysql_do(sql)
if __name__ == '__main__':
    a()

slash宋

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
爬虫--腾讯招聘

import requestsfrom lxml import etreeimport pymysqlclass MysqlHelper(object): # 初始化的构造函数 def __init__(self): self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456...
复制链接

扫一扫