import requests
from lxml import etree
import pymysql
class mysql_conn():
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','root','wang')
self.cursor = self.db.cursor()
def execute_modify_mysql(self,sql,data):
self.cursor.execute(sql,data)
self.db.commit()
def __del__(self):
self.db.close()
sql = 'insert into tengxun(zhiwei,place,leibie,renshu,zhize,yaoqiu) values(%s,%s,%s,%s,%s,%s)'
sq = mysql_conn()
for i in range(1,4):
url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start={}#a'.format((i-1)*10)
headers = {
# 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
# # 'Accept-Encoding':'gzip, deflate, br',
# 'Accept-Language':'zh-CN,zh;q=0.9',
# 'Cache-Control':'max-age=0',
# 'Connection':'keep-alive',
# 'Cookie':'pgv_pvi=6554744832; PHPSESSID=0fm5ui9r7qcqll83btu7qr2jq0; pgv_si=s1014036480',
# 'Host':'hr.tencent.com',
# 'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
response = requests.get(url,headers=headers)
#不保存到本地看不见数据
# with open('tengxun.html','wb') as f:
# f.write(response.content)
html = response.text
html_ele = etree.HTML(html)
tr_list = html_ele.xpath('//table[@class="tablelist"]/tr')[2:-1]
for res_tr in tr_list:
# print(res_tr)
t_url = 'https://hr.tencent.com/'
zhiwei = res_tr.xpath('./td[1]/a')[0].text
# print(zhiwei)
zhiwei_url = t_url + res_tr.xpath('./td[1]/a/@href')[0]
# print(zhiwei_url)
response2 = requests.get(zhiwei_url,headers=headers)
# print(response2.text)
# with open('tengxun_zhiwei.html','wb') as f:
# f.write(response2.content)
html2 = response2.text
html_ele2 = etree.HTML(html2)
# 获取职位信息
place_str = html_ele2.xpath('//tr[@class="c bottomline"]/td[1]/span/text()')[0]
zhiwei_place =place_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[1]/text()')[0]
# print(zhiwei_place)
leibie_str = html_ele2.xpath('//tr[@class="c bottomline"]/td[2]/span/text()')[0]
zhiwei_leibie =leibie_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[2]/text()')[0]
# print(zhiwei_leibie)
renshu_str = html_ele2.xpath('//tr[@class="c bottomline"]/td[3]/span/text()')[0]
zhiwei_renshu =renshu_str + html_ele2.xpath('//tr[@class="c bottomline"]/td[3]/text()')[0]
# print(zhiwei_renshu)
zhize_str = html_ele2.xpath('//div[@class="lightblue"]/text()')[0]
zhiwei_zhize =zhize_str + ''.join(html_ele2.xpath('//ul[@class="squareli"]')[0].xpath('./li/text()'))
# print(zhiwei_zhize)
yaoqiu_str = html_ele2.xpath('//div[@class="lightblue"]/text()')[1]
# print(yaoqiu_str)
zhiwei_yaoqiu =yaoqiu_str + ''.join(html_ele2.xpath('//ul[@class="squareli"]')[1].xpath('./li/text()'))
# print(zhiwei_yaoqiu)
data = (zhiwei,zhiwei_place,zhiwei_leibie,zhiwei_renshu,zhiwei_zhize,zhiwei_yaoqiu)
sq.execute_modify_mysql(sql,data)
print('第%d页存储完毕'% i)
获取腾讯职位的招聘信息
最新推荐文章于 2021-08-20 10:51:10 发布