爬取拉勾网求职信息(Python职位)
import datetime
import time
import requests
import xlwt
def List(City,Pname,Cname,Csize,Salary,WorkYear,Education,PositionAdvantage,LastLogin,Hitags):
list.append(City)
list.append(Pname)
list.append(Cname)
list.append(Csize)
list.append(Salary)
list.append(WorkYear)
list.append(Education)
list.append(PositionAdvantage)
list.append(LastLogin)
list.append(Hitags)
list = []
baseurl = 'https://www.lagou.com/jobs/list_python/p-city_0?px=default'
url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false'
for i in range(1,11):
data = {
'first': 'true',
'pn': i,
'kd': 'python',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0',
'Referer': 'https://www.lagou.com/jobs/list_python/p-city_0?px=default',
}
session = requests.session()
session.get(baseurl,headers = headers)
cookies = session.cookies
response = requests.post(url,headers=headers,data=data,cookies=cookies).json()
response = response.get('content')
response = response.get('positionResult')
response = response.get('result')
for item in response:
City = item.get('city')
Pname = item.get('positionName')
Cname = item.get('companyFullName')
Csize = item.get('companySize')
Salary = item.get('salary')
WorkYear = item.get('workYear')
Education = item.get('education')
PositionAdvantage = item.get('positionAdvantage')
LastLogin = item.get('lastLogin')
Hitags = item.get('hitags')
print(f'{City}--{Pname}--{Cname}--{Csize}--{Salary}--{WorkYear}--{Education}--{PositionAdvantage}--{LastLogin}--{Hitags}')
List(City,Pname,Cname,Csize,Salary,WorkYear,Education,PositionAdvantage,LastLogin,Hitags)
time.sleep(2)
print(len(list))
co = 0
new_list = []
for i in range(len(list)):
co += 1
if co == 10:
list_2 = list[i-9:i+1]
new_list.append(list_2)
co = 0
print(len(new_list))
now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet = book.add_sheet('Python招聘')
col = ("所在城市","职位名称","公司名称","公司人数","薪资","工作经验","教育条件","职位诱惑","发布时间","其他")
for i in range(0,len(col)):
sheet.write(0,i,col[i])
for i in range(0,len(new_list)):
for j in range(0,len(col)):
sheet.write(i+1,j,new_list[i][j])
book.save(f'拉钩网_Python{now}.xls')