说明:登录boss直聘、获取推荐牛人信息、筛选牛人,然后进行沟通,目前只能做到沟通,还不能进行继续交流。
图片验证码目前只能将验证码图片保存到本地,通过图片管理器打开之后手工输入。
一次登录后,可在会话失效前多次执行牛人推荐沟通
import requests
from bs4 import BeautifulSoup
import json
import datetime
import re
import sys
import logging
import os
import http.cookiejar as cookielib
class Boss(object):
def __init__(self):
self.sessionid = requests.session()
self.sessionid.cookies = cookielib.LWPCookieJar(filename="./basszhipincookie.txt")
def get_config(self):
with open("./bosszhipin/config.json","rb") as config:
self.config = json.load(config,encoding='utf-8')
def boss_login(self):
res = self.sessionid.get(self.config['url_login_page'])
soup = BeautifulSoup(res.text, 'lxml')
randomKey = soup.select_one('form input[name=randomKey]').attrs['value']
#读取验证码
captchaurl = soup.select_one('form img[class=verifyimg]').attrs['src']
res = self.sessionid.get(self.config['domain']+captchaurl)
#保存验证码图片
f = open("./img.jpg", "wb")
f.write(res.content)
f.close()
#打开图片
os.system('start img.jpg')
print('输入验证码:')
captchacontent = input()
print(captchacontent)
logindata = {
'pk' : 'cpc_user_sign_up',
'regionCode': '+86',
'phone': self.config['iphone'],
'captcha': captchacontent,
'randomKey': randomKey,
'phoneCode': '', 'smsType': '1'
}
#print(logindata)
# 读取短信验证码
res = self.sessionid.post(self.config['url_sendsms'],data=logindata)
if (json.loads(res.text)['rescode'] != 1):
print('短信校验码获取错误',res.text)
return False
print('请输入短信校验码:')
logindata['phoneCode'] = input()
print(logindata)
#登录
res = self.sessionid.post(self.config['url_login'],data=logindata)
print(res.text)
#b保存cookie
self.sessionid.cookies.save()
'''读取cookie'''
def readcookie(self):
with open("./basszhipincookie.txt", "r") as ck:
tt = ck.read()
lt = tt.split('\n')[1]
regexp = (r'(.*): (?P<t>.*)path=(.*)')
match = re.search(regexp, lt.strip(), re.U)
if match is None:
raise ValueError('Incorrent url: {0}'.format(lt.strip()))
url_parts = match.groupdict()['t']
cookiedict = { 'cookie': url_parts,
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'}
return cookiedict
'''
查询职位
'''
def search(self):
self.sessionid.cookies.load()
'''
res = self.sessionid.get('https://www.zhipin.com/boss/search/geek.html')
soup = BeautifulSoup(res.text, 'lxml')
tokenscript = soup.find("head").find_all('script')
tokenstr= re.sub('token', '\"token\"', tokenscript[0].text.strip())
tokendict = json.loads(tokenstr[6:])
self.sessionid.headers.update(tokendict)
cookiestr=self.readcookie()
'''
self.sessionid.headers.update(self.readcookie())
searchparam = {
'page': 2,
'source': 1,
'jobId':'',
'jobs':'',
'companies':'',
'skills':'',
'schools':'',
'keywords': 'java 男 南京 本科 4年以上',
'salary':'',
'experience':'',
'degree':'',
'gender':'',
'school211':'',
'returnees':'',
'applyStatus':''
}
res = self.sessionid.get(self.config['url_search'], params=searchparam)
print(res.text)
'''
推荐牛人
'''
def recommend(self,page):
self.sessionid.headers.update(self.readcookie())
print(self.sessionid.headers)
#参数:本科 5-10年
searchparam = {'page': page, 'status': 0, 'jobid': '946d106f73475c3c1XB73925GVs~', 'degree': '203',
'experience': '106'}
res = self.sessionid.get('https://www.zhipin.com/boss/recommend/geeks.json', params=searchparam)
#print(res.text)
soup = BeautifulSoup(res.text, 'lxml')
geeks = soup.find('body').find_all('li', attrs={'data-uid': re.compile('.*')})
#print('ddd',geeks)
for geekinfo in geeks:
if (geekinfo.get('data-expect') == None):
gender,isWorkCompay,dateofgraduation=self.chatinfo(self.sessionid, geekinfo)
if gender == 'male' and isWorkCompay == False and dateofgraduation <= '2014':
self.batchAddRelatin(self.sessionid, geekinfo)
# 打招呼
def batchAddRelatin(self,sessionid, geekinfo):
geekinfodict = {}
ahref = geekinfo.find('a')
geekinfodict.update({'gids': ahref.get('data-uid')[2:-2]})
geekinfodict.update({'expectIds': ahref.get('data-expect')[2:-2]})
geekinfodict.update({'lids': ahref.get('data-lid')[2:-2]})
geekinfodict.update({'jids': ahref.get('data-jid')[2:-2]})
geekinfodict.update({'suids': ''})
print(geekinfodict)
res = sessionid.post('https://www.zhipin.com/chat/batchAddRelation.json', data=geekinfodict)
print(res.text)
# 查简历
def chatinfo(self,sessionid, geekinfo):
ahref = geekinfo.find('a')
# 查简历
em = geekinfo.find('em')
chatinfodict = {}
chatinfodict.update({'uid': em.get('data-eid')[2:-2]})
chatinfodict.update({'expectId': em.get('data-expect')[2:-2]})
chatinfodict.update({'lid': ahref.get('data-lid')[2:-2]})
chatinfodict.update({'jid': ahref.get('data-jid')[2:-2]})
chatinfodict.update({'ka': em.get('ka')[2:-2]})
chatinfodict.update({'GET': ''})
#print(chatinfodict)
res = sessionid.get('https://www.zhipin.com/chat/geek/info', params=chatinfodict)
#print(res.text)
soup = BeautifulSoup(res.text, 'html.parser')
#print(soup)
resumeitems = soup.find_all('div', attrs={'class': 'resume-item'})
#print(resumeitems)
#print('个人简介:',re.sub('\s+', '#', resumeitems[0].text))
#获取性别
gender = 'male' if resumeitems[0].find('i', attrs={'class': 'fz fz-male'}) else 'female'
#print( resumeitems[0])
#print('gender:',gender)
item = resumeitems[1:]
for im in item:
# if (im.find('h3',attrs={'class': 'title'}).text)=='期望职位':
# print('期望职位:',re.sub('\s+', '#', im.text))
if (im.find('h3', attrs={'class': 'title'}).text) == '工作经历':
#print('工作经历:', re.sub('\s+', '#', im.text))
isWorkCompay = True if re.sub('\s+', '#', im.text).find('公司名称') != -1 else False
# if (im.find('h3', attrs={'class': 'title'}).text) == '项目经验':
# print('项目经验:',re.sub('\s+', '#', im.text))
if (im.find('h3', attrs={'class': 'title'}).text) == '教育经历':
data = list(item.text for item in im.find_all('span', attrs={'class': 'period'}))
dateofgraduation = max(re.split(r'[, -]',str(data).strip('[').strip(']').strip('\'')))
return gender,False if isWorkCompay is None else isWorkCompay, '2018' if dateofgraduation is None else dateofgraduation
if __name__ == '__main__':
dbs = Boss()
dbs.get_config()
print(dbs.config['url_search'])
#dbs.boss_login()
for i in range(1,100):
for j in range(1,50):
dbs.recommend(j)
#print(dbs.config['url'])
python爬取boss直聘源码
最新推荐文章于 2024-05-26 18:32:46 发布