爬虫练习

最新推荐文章于 2024-08-20 13:16:22 发布

北海以北_IT

最新推荐文章于 2024-08-20 13:16:22 发布

阅读量416

点赞数

分类专栏：爬虫

本文链接：https://blog.csdn.net/weixin_42959393/article/details/81837724

版权

爬虫专栏收录该内容

8 篇文章 0 订阅

订阅专栏

腾讯招聘

import requests,os
from lxml import etree
from  pymysql_1 import mysql_a
from urllib import parse

# position_detail.php?id=43011&keywords=python&tid=0&lid=0
base_url = 'https://hr.tencent.com/position.php?keywords=python&lid=0&tid=0&start=%s#a'
# https: // hr.tencent.com / position_detail.php?id = 43505 & keywords = python & tid = 0 & lid = 0
# https: // hr.tencent.com / position_detail.php?id = 43489 & keywords = python & tid = 0 & lid = 0

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
bd = mysql_a()
sql = 'insert into tengxun(title,address,num,yaoqiu) values(%s,%s,%s,%s)'

for i in range(0,3):
    q = i*10
    url = base_url % q
    response = requests.get(url,headers=headers)
    # with open('tengxun.html','wb') as f:
    #     f.write(response.content)
    html_ele = etree.HTML(response.text)

    tr_list = html_ele.xpath('//table[@class="tablelist"]/tr')
    for i in tr_list:
        href = i.xpath('./td/a/@href')
        # print(href)
        if href != []:
            xiang_url = parse.urljoin(base_url, href[0])
            # print(xiang_url)
            res = requests.get(xiang_url,headers=headers)

            html_el = etree.HTML(res.text)
            # // *[ @ id = "position_detail"] / div / table
            tr_list_x = html_el.xpath('//div[@id="position_detail"]/div/table')

            for s in tr_list_x:
                # 职位
                title = s.xpath('./tr[1]/td')[0].text
                # 地址
                address = s.xpath('./tr[2]/td[1]/text()')[0]

                # 人数
                num = s.xpath('./tr[2]/td[3]/text()')[0]
                # print(num)
                # 工作要求
                yaoqiu = s.xpath('./tr[4]/td[1]/ul/li/text()')[0]
                data = (title,address,num,yaoqiu)

                bd.execute_modify_mysql(sql,data)