python爬虫兼职-python爬虫爬取拉勾招聘网

# -*- coding: utf-8 -*-

"""

Created on Mon Sep 7 21:44:39 2020

@author: ASUS

"""

外汇常见问题https://www.kaifx.cn/lists/question/

import requests

import time

import json

import xlwt

workbook = xlwt.Workbook(encoding=' utf-8')

mysheet = workbook.add_sheet('mysheet')

mysheet.write(0, 0, 'positionId')

mysheet.write(0, 1, 'positionName')

mysheet.write(0, 2, 'companyId')

mysheet.write(0, 3, 'companyFullName')

mysheet.write(0, 4, 'city')

mysheet.write(0, 5, 'companyLabelList')

mysheet.write(0, 6, 'companyLogo')

mysheet.write(0, 7, 'companyShortName')

mysheet.write(0, 8, 'companySize')

mysheet.write(0, 9, 'createTime')

mysheet.write(0, 10, 'district')

mysheet.write(0, 11, 'education')

mysheet.write(0, 12, 'financeStage')

mysheet.write(0, 13, 'firstType')

mysheet.write(0, 14, 'formatCreateTime')

mysheet.write(0, 15, 'industryField')

mysheet.write(0, 16, 'jobNature')

mysheet.write(0, 17, 'lastLogin')

mysheet.write(0, 18, 'latitude')

mysheet.write(0, 19, 'linestaion')

mysheet.write(0, 20, 'longitude')

mysheet.write(0, 21, 'matchScore')

mysheet.write(0, 22, 'positionAdvantage')

mysheet.write(0, 23, 'positionId')

mysheet.write(0, 24, 'positionLables')

mysheet.write(0, 25, 'positionName')

mysheet.write(0, 26, 'secondType')

mysheet.write(0, 27, 'skillLables')

mysheet.write(0, 28, 'stationname')

mysheet.write(0, 29, 'subwayline')

mysheet.write(0, 30, 'thirdType')

mysheet.write(0, 31, 'workYear')

def main(kd,pages,row):

# 通过访问主网页获取cookies和session

url1 = 'https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput='

# 提交ajax请求,获取json数据

url = "https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false"

# 请求头

headers = {

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',

'Referer': 'https://www.lagou.com/jobs/list_python?px=default&city=%E5%85%A8%E5%9B%BD',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',

'Host': 'www.lagou.com'

}

# 使用data来决定获取多少页的json数据

for page in range(1, pages):

data = {

'first': 'false',

'pn': page,

'kd': 'python'

}

data['kd']=kd

s = requests.Session() # 建立session

s.get(url=url1, headers=headers, timeout=1)

cookie = s.cookies # 获取cookie

respon = s.post(url=url, headers=headers, data=data, cookies=cookie, timeout=3)

time.sleep(1)

#print(respon.text)

result = json.loads(respon.text)

info = result["content"]["positionResult"]["result"]

print(len(info))

for j in info:

mysheet.write(row, 0, j['positionId'])

mysheet.write(row, 1, j['positionName'])

mysheet.write(row, 2, j['companyId'])

mysheet.write(row, 3, j['companyFullName'])

mysheet.write(row, 4, j['city'])

mysheet.write(row, 5, j['companyLabelList'])

mysheet.write(row, 6, j['companyLogo'])

mysheet.write(row, 7, j['companyShortName'])

mysheet.write(row, 8, j['companySize'])

mysheet.write(row, 9, j['createTime'])

mysheet.write(row, 10, j['district'])

mysheet.write(row, 11, j['education'])

mysheet.write(row, 12, j['financeStage'])

mysheet.write(row, 13, j['firstType'])

mysheet.write(row, 14, j['formatCreateTime'])

mysheet.write(row, 15, j['industryField'])

mysheet.write(row, 16, j['jobNature'])

mysheet.write(row, 17, j['lastLogin'])

mysheet.write(row, 18, j['latitude'])

mysheet.write(row, 19, j['linestaion'])

mysheet.write(row, 20, j['longitude'])

mysheet.write(row, 21, j['matchScore'])

mysheet.write(row, 22, j['positionAdvantage'])

mysheet.write(row, 23, j['positionId'])

mysheet.write(row, 24, j['positionLables'])

mysheet.write(row, 25, j['positionName'])

mysheet.write(row, 26, j['secondType'])

mysheet.write(row, 27, j['skillLables'])

mysheet.write(row, 28, j['stationname'])

mysheet.write(row, 29, j['subwayline'])

mysheet.write(row, 30, j['thirdType'])

mysheet.write(row, 31, j['workYear'])

row=row+1

workbook.save('py3.xls')

# 获取前两页的职位json信息

kd=input('输入关键字:')

pages=int(input('输入要爬取多少页:'))

main(kd,pages,1)

# 结果如下:

# {"resubmitToken":null,"requestId":null,"msg":null,"success":true,"content":{"hrInfoMap":{"6187967":{"userId":11765418,"phone":null,"positionName":"招聘经理",........."pageSize":15},"code":0}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值