爬虫练习

腾讯招聘

import requests,os
from lxml import etree
from  pymysql_1 import mysql_a
from urllib import parse

# position_detail.php?id=43011&keywords=python&tid=0&lid=0
base_url = 'https://hr.tencent.com/position.php?keywords=python&lid=0&tid=0&start=%s#a'
# https: // hr.tencent.com / position_detail.php?id = 43505 & keywords = python & tid = 0 & lid = 0
# https: // hr.tencent.com / position_detail.php?id = 43489 & keywords = python & tid = 0 & lid = 0

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
bd = mysql_a()
sql = 'insert into tengxun(title,address,num,yaoqiu) values(%s,%s,%s,%s)'

for i in range(0,3):
    q = i*10
    url = base_url % q
    response = requests.get(url,headers=headers)
    # with open('tengxun.html','wb') as f:
    #     f.write(response.content)
    html_ele = etree.HTML(response.text)

    tr_list = html_ele.xpath('//table[@class="tablelist"]/tr')
    for i in tr_list:
        href = i.xpath('./td/a/@href')
        # print(href)
        if href != []:
            xiang_url = parse.urljoin(base_url, href[0])
            # print(xiang_url)
            res = requests.get(xiang_url,headers=headers)

            html_el = etree.HTML(res.text)
            # // *[ @ id = "position_detail"] / div / table
            tr_list_x = html_el.xpath('//div[@id="position_detail"]/div/table')

            for s in tr_list_x:
                # 职位
                title = s.xpath('./tr[1]/td')[0].text
                # 地址
                address = s.xpath('./tr[2]/td[1]/text()')[0]

                # 人数
                num = s.xpath('./tr[2]/td[3]/text()')[0]
                # print(num)
                # 工作要求
                yaoqiu = s.xpath('./tr[4]/td[1]/ul/li/text()')[0]
                data = (title,address,num,yaoqiu)

                bd.execute_modify_mysql(sql,data)


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值