python月薪25k+的名企_大学毕业后,凭什么她用Python月薪25K,与我们天壤之别?...

#-*- coding: utf-8 -*-

import re

import csv

import requests

from tqdm import tqdm

from urllib.parse import urlencode

from requests.exceptions import RequestException

def get_one_page(city, keyword, region, page):

‘‘‘

获取网页html内容并返回

‘‘‘

paras = {

‘jl‘: city, # 搜索城市

‘kw‘: keyword, # 搜索关键词

‘isadv‘: 0, # 是否打开更详细搜索选项

‘isfilter‘: 1, # 是否对结果过滤

‘p‘: page, # 页数

‘re‘: region # region的缩写,地区,2005代表海淀

}

headers = {

‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36‘,

‘Host‘: ‘sou.zhaopin.com‘,

‘Referer‘: ‘网址‘,

‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8‘,

‘Accept-Encoding‘: ‘gzip, deflate, br‘,

‘Accept-Language‘: ‘zh-CN,zh;q=0.9‘

}

url = ‘网址/jobs/searchresult.ashx?‘ + urlencode(paras)

try:

# 获取网页内容,返回html数据

response = requests.get(url, headers=headers)

# 通过状态码判断是否获取成功

if response.status_code == 200:

return response.text

return None

except RequestException as e:

return None

def parse_one_page(html):

‘‘‘

解析HTML代码,提取有用信息并返回

‘‘‘

# 正则表达式进行解析

pattern = re.compile(‘(.*?).*?‘ # 匹配职位信息

(.*?).*?‘ # 匹配公司网址和公司名称

(.*?)‘, re.S) # 匹配月薪

# 匹配所有符合条件的内容

items = re.findall(pattern, html)

for item in items:

job_name = item[0]

job_name = job_name.replace(‘‘, ‘‘)

job_name = job_name.replace(‘‘, ‘‘)

yield {

‘job‘: job_name,

‘website‘: item[1],

‘company‘: item[2],

‘salary‘: item[3]

}

def write_csv_file(path, headers, rows):

‘‘‘

将表头和行写入csv文件

‘‘‘

# 加入encoding防止中文写入报错

# newline参数防止每写入一行都多一个空行

with open(path, ‘a‘, encoding=‘gb18030‘, newline=‘‘) as f:

f_csv = csv.DictWriter(f, headers)

f_csv.writeheader()

f_csv.writerows(rows)

def write_csv_headers(path, headers):

‘‘‘

写入表头

‘‘‘

with open(path, ‘a‘, encoding=‘gb18030‘, newline=‘‘) as f:

f_csv = csv.DictWriter(f, headers)

f_csv.writeheader()

def write_csv_rows(path, headers, rows):

‘‘‘

写入行

‘‘‘

with open(path, ‘a‘, encoding=‘gb18030‘, newline=‘‘) as f:

f_csv = csv.DictWriter(f, headers)

f_csv.writerows(rows)

def main(city, keyword, region, pages):

‘‘‘

主函数

‘‘‘

filename = ‘zl_‘ + city + ‘_‘ + keyword + ‘.csv‘

headers = [‘job‘, ‘website‘, ‘company‘, ‘salary‘]

write_csv_headers(filename, headers)

for i in tqdm(range(pages)):

‘‘‘

获取该页中所有职位信息,写入csv文件

‘‘‘

jobs = []

html = get_one_page(city, keyword, region, i)

items = parse_one_page(html)

for item in items:

jobs.append(item)

write_csv_rows(filename, headers, jobs)

if __name__ == ‘__main__‘:

main(‘北京‘, ‘python工程师‘, 2015, 10)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值