python爬虫爬取腾讯招聘信息 (静态爬虫)

环境:

windows7,python3.4

 

代码:(亲测可正常执行)

 1 import requests
 2 from bs4 import BeautifulSoup
 3 from math import ceil
 4 
 5 header = {
 6     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
 7 
 8 
 9 # 获取岗位页数
10 def getJobPage(url):
11     ret = requests.get(url, headers=header)
12     ret.encoding = "utf-8"  # 解决乱码问题
13     html = ret.text
14     soup = BeautifulSoup(html, 'html.parser')
15     # 获取岗位总数,< span class ="lightblue total" > 512 < / span >
16     totalJob = soup.select('span[class="lightblue total"]')[0].text
17     jobPage = ceil(int(totalJob) / 10)
18     return jobPage
19 
20 
21 def getJobOrder(url):
22     ret = requests.get(url, headers=header)
23     ret.encoding = "utf-8"  # 解决乱码问题
24     html = ret.text
25     soup = BeautifulSoup(html, 'html.parser')
26     # 工作职责
27     jobRequests = soup.select('ul[class="squareli"]')[0].text
28     # 工作要求
29     jobOrder = soup.select('ul[class="squareli"]')[1].text
30     return jobRequests, jobOrder
31 
32 
33 # 获取岗位信息
34 def getJobInfo(url):
35     myfile = open("tencent_job.txt", "a", encoding='gb18030', errors='ignore')  # 解决乱码问题
36     ret = requests.get(url, headers=header)
37     ret.encoding = "utf-8"  # 解决乱码问题
38     html = ret.text
39     soup = BeautifulSoup(html, 'html.parser')
40     jobList = soup.find_all('tr', class_=['even', 'odd'])
41     for job in jobList:
42         # url
43         jobUrl = "https://hr.tencent.com/" + job.select('td:nth-of-type(1) > a')[0]['href']
44         # 职位名称
45         jobName = job.select('td:nth-of-type(1) > a')[0].text
46         # 人数
47         jobPeople = job.select('td:nth-of-type(3)')[0].text
48         # 地点
49         jobAddre = job.select('td:nth-of-type(4)')[0].text
50         # 发布时间
51         jobTime = job.select('td:nth-of-type(5)')[0].text
52         # 工作职责
53         jobRequests = getJobOrder(jobUrl)[0]
54         # 工作要求
55         jobOrder = getJobOrder(jobUrl)[1]
56 
57         #print(jobName, jobUrl, jobAddre, jobPeople, jobTime, jobRequests, jobOrder)
58 
59         tt = jobName + " " + jobUrl + " " + jobAddre + " " + jobPeople + " " + jobTime + " " + jobRequests + " " + jobOrder
60         myfile.write(tt + "\n")
61 
62 
63 if __name__ == '__main__':
64     mainurl = 'https://hr.tencent.com/position.php?keywords=python'
65     jobPage = getJobPage(mainurl)
66     print(jobPage)
67     for page in range(jobPage):
68         pageUrl = 'https://hr.tencent.com/position.php?keywords=python&start=' + str(page * 10) + '#a'
69         print("" + str(page + 1) + "")
70         getJobInfo(pageUrl)

 

转载于:https://www.cnblogs.com/sunshine-blog/p/9295739.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值