import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_python%E7%88%AC%E8%99%AB?labelWords=&fromSearch=true&suginput=', # 来源 (防盗链?)
}
for page in range(1,11):
# 翻页处理
data = {
'first': ' true',
'pn': page,
'kd': ' python爬虫',
}
def get_cookie(url):
'''获取不断变化的 cookie'''
session = requests.session()
session.get(url,headers=headers)
cookies = session.cookies
cookies_1 = cookies.get_dict()
return cookies_1
def get_mess(url1,cookies_1,data):
'''获取数据保存'''
print('正在获取第{}页数据'.format(page))
resp = requests.post(url1,headers=headers,data=data,cookies=cookies_1).json() # json数据 类型 字典
lists = resp['content']['positionResult']['result']
for list in lists:
# 公司名称
companyFullName = list['companyFullName']
# 岗位
positionName = list['positionName']
# 规模
companySize = list['companySize']
# 薪资
salary = list['salary']
print("\t正在保存公司信息:",companyFullName)
# 数据保存
path = r'C:\Users\DELL\Desktop\python_wd\拉钩.csv'
with open(path,'a',encoding='utf-8')as f:
f.write('{},{},{},{}\n'.format(companyFullName,positionName,companySize,salary))
# 主函数
def main():
url = 'https://www.lagou.com/jobs/list_python%E7%88%AC%E8%99%AB?labelWords=&fromSearch=true&suginput='
url1 = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
cookies_1 = get_cookie(url)
get_mess(url1,cookies_1,data)
if __name__ == '__main__':
main()
拉钩 翻页 方法
最新推荐文章于 2020-05-15 03:11:19 发布