注:可以获取当前城市任何岗位,以及热门城市的任何岗位
from selenium import webdriver
needSearchposition = input('输入你想要搜索的职位:')
needSearchcity = input('输入你想要选择的城市:')
driver = webdriver.Chrome(r'E:\Python3.6\Scripts\chromedriver.exe')
driver.get('http://51job.com')
driver.implicitly_wait(3)
#定位、输入
driver.find_element_by_id('kwdselectid').send_keys('%s' % needSearchposition)
driver.find_element_by_id('work_position_input').click()
cityEles = driver.find_elements_by_css_selector('#work_position_click_center_right em')
# 选择所有城市,去掉非想要的的且选择想要的,
# 如果是想要的但是没有选,选上这些城市
for one in cityEles:
cityName = one.text
selected = one.get_attribute('class')
# print cityName,seleted
if cityName == needSearchcity:
if selected != 'on':
one.click()
else:
if selected == 'on':
one.click()
# 保存城市选择
driver.find_element_by_id('work_position_click_bottom_save').click()
# 点击搜索
driver.find_element_by_css_selector('.ush button').click()
#获取职位总数,计算页数
allnumber = driver.find_element_by_css_selector('#resultList > div.dw_tlc > div:nth-child(4)').text
index = allnumber.find('条')
allint = (int(allnumber[1:index]))
if allint % 50:
allint = allint//50 + 1
elif not (allint % 50):
allint = allint//50
rets = driver.find_elements_by_css_selector('body div.dw_wp div#resultList.dw_table div.el')
print('------------------------------1----------------------------')
for ret in rets[1:]:
ret = ret.text
position,company,money,date = ret.split('\n')[0],ret.split('\n')[1],ret.split('\n')[3],ret.split('\n')[2]
print('%s|%s|%s|%s' % (position,company,money,date))
everyone = '%s|%s|%s|%s' % (position, company, money,date)
i = 1
while i < allint:
# for i in range(1,51):
i = i + 1
#当前第几页
pageNow = driver.find_element_by_css_selector('#resultList div.dw_page div div div ul li.on').text
#下一页的url
url = driver.find_element_by_css_selector('#resultList div.dw_page div div div ul li:last-child a').get_attribute('href')
#下一页的url中的页数
pageNextnumber = url.split('.html')[0].split(',')[-1]
pageNextnumberlen = len(pageNextnumber)
urlfirst = url.split('.html')[0] #url前半段
urlfirstlen = len(url.split('.html')[0]) #url前半段长度
newUrl1 = url[:(urlfirstlen - pageNextnumberlen)] #除去页数的前半段url
newUrl2 = url.split('.html')[1]
a = newUrl1+pageNextnumber+'.html'+newUrl2
driver.get('%s' % a)
rets = driver.find_elements_by_css_selector('body div.dw_wp div#resultList.dw_table div.el')
print('------------------------------%s----------------------------' % i)
for ret in rets[1:]:
ret = ret.text
position,company,money,date = ret.split('\n')[0],ret.split('\n')[1],ret.split('\n')[3],ret.split('\n')[2]
print('%s|%s|%s|%s' % (position,company,money,date))
everyone = '%s|%s|%s|%s' % (position, company, money,date)
driver.quit()