import requests
from lxml import etree
import pymysql
class MysqlHelper(object):
# 初始化的构造函数
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='py101',charset='utf8')
self.cursor = self.db.cursor()
# 执行修改操作
def mysql_do(self,sql):
self.cursor.execute(sql)
self.db.commit()
# 结束函数
def __del__(self):
self.cursor.close()
self.db.close()
#1 .在列表页获取详情页的url
def a ():
for i in range(0,4):
mysql_ = MysqlHelper()
url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start={}'.format(i*10)
headers = {
'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
'Host': 'hr.tencent.com',
'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=10',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
response = requests.get(url,headers=headers)
# print(response)
html_ele = etree.HTML(response.text)
li_list = html_ele.xpath('//table[@class="tablelist"]/tr')
# print(li_list)
li_list = li_list[1:]
# print(li_list)
li_list = li_list[:-1]
#遍历
for li_ele in li_list:
name = li_ele.xpath('./td[1]/a/@href')[0]
print(name)
particulars_url = 'https://hr.tencent.com/'+name# 分页url
print(particulars_url)
b(particulars_url,mysql_)
# break
def b(particulars_url,mysql):
# print(particulars_url)
headers ={
'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
'Host': 'hr.tencent.com',
'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
response = requests.get(particulars_url,headers=headers)
print(response)
html_ele = etree.HTML(response.text)
# print(html_ele)
li_list = html_ele.xpath('//table[@class="tablelist textl"]/tr')
# print(li_list)
position =html_ele.xpath('//table[@class="tablelist textl"]/tr[1]/td/text()')[0]
print(position)
site = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[1]/text()')[0]
print(site)
category = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[2]/text()')[0]
print(category)
people = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[3]/text()')[0]
print(people)
x_list = html_ele.xpath('//table[@class="tablelist textl"]/tr[3]/td/ul/li/text()')
# print(x_list)
for list_ in x_list:
duty = list_
print(duty)
sql = 'insert into p222(position,site,category,people,duty)values({},{},{},{},{})'.format(repr(position),repr(site),repr(category),repr(people),repr(duty))
print(sql)
mysql.mysql_do(sql)
if __name__ == '__main__':
a()
爬虫--腾讯招聘
最新推荐文章于 2024-08-03 19:27:22 发布