腾讯招聘
import requests,os
from lxml import etree
from pymysql_1 import mysql_a
from urllib import parse
# position_detail.php?id=43011&keywords=python&tid=0&lid=0
base_url = 'https://hr.tencent.com/position.php?keywords=python&lid=0&tid=0&start=%s#a'
# https: // hr.tencent.com / position_detail.php?id = 43505 & keywords = python & tid = 0 & lid = 0
# https: // hr.tencent.com / position_detail.php?id = 43489 & keywords = python & tid = 0 & lid = 0
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
bd = mysql_a()
sql = 'insert into tengxun(title,address,num,yaoqiu) values(%s,%s,%s,%s)'
for i in range(0,3):
q = i*10
url = base_url % q
response = requests.get(url,headers=headers)
# with open('tengxun.html','wb') as f:
# f.write(response.content)
html_ele = etree.HTML(response.text)
tr_list = html_ele.xpath('//table[@class="tablelist"]/tr')
for i in tr_list:
href = i.xpath('./td/a/@href')
# print(href)
if href != []:
xiang_url = parse.urljoin(base_url, href[0])
# print(xiang_url)
res = requests.get(xiang_url,headers=headers)
html_el = etree.HTML(res.text)
# // *[ @ id = "position_detail"] / div / table
tr_list_x = html_el.xpath('//div[@id="position_detail"]/div/table')
for s in tr_list_x:
# 职位
title = s.xpath('./tr[1]/td')[0].text
# 地址
address = s.xpath('./tr[2]/td[1]/text()')[0]
# 人数
num = s.xpath('./tr[2]/td[3]/text()')[0]
# print(num)
# 工作要求
yaoqiu = s.xpath('./tr[4]/td[1]/ul/li/text()')[0]
data = (title,address,num,yaoqiu)
bd.execute_modify_mysql(sql,data)