python爬取智联招聘信息

分享今天写的一个爬取智联招聘信息的爬虫,使用了requests和re模块,没有写注释,但是代码都比较简单,不是太难,

这是爬取的信息: python爬取智联招聘信息

以下是源码部分:复制过来又没有缩进,大家自行缩进,逻辑比较简单
# -*- coding: utf-8 -*-
import requests
import re
from itertools import izip
from json import dumps
from urllib import quote

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                 'Chrome/62.0.3202.89 Safari/537.36',
}


def write_file(all_info):

    fp = open('info.json', 'a')
for info in all_info:

    fp.write(dumps(info, encoding='utf-8', ensure_ascii=False, sort_keys=False, indent=4))

    fp.write('\n')

    fp.close()


def get_html(work, where, page_num=1):

    where = quote(where)  # 北京
    work = quote(work)

    url = 'http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%s&kw=%s&sm=0&p=%s' % (where, work, page_num)

    response = requests.get(url, headers=headers).text
return response


def get_info(response):

    one_work_info = dict()

    salary = re.findall(r'
(.*?)
', response)

    work_locate = re.findall(r'
(.*?)
', response)

    company = re.findall(r'
(.*?)
', response)

    work_name = re.findall(r'(.*?)', response)

    work_paticuler_info = re.findall(r'', response)

    company_info = re.findall(r'
', response)
for salary, locate, company, work,work_info, company_infoin \
izip(salary,work_locate,company, work_name, work_paticuler_info, company_info):
one_work_info['salary'] =salary
one_work_info['work_locate'] =locate
one_work_info['company'] =re.sub(r'<.*?>', '',company)
one_work_info['work_name'] =re.sub(r'<.*?>', '',work)
one_work_info['work_paticuler_info'] = work_info
one_work_info['company_info'] =company_info
yield one_work_info


def run():
work = raw_input(u"请输入你想要查询的工作:")
where = raw_input(u"请输入你要查询的工作地点:")
html = get_html(work,where)
work_info = get_info(html)
write_file(work_info)


if __name__ == '__main__':
run()


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 13
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 13
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值