在 牛客网--求职--笔试日历 可以笔试时间每天都有哪些公司有笔试,如下图
一、需求
1、看看这个有笔试的公司哪家你还没看过。
2、看看自己投过的公司哪家这个月有笔试及笔试的时间。
当然,你还是得把你投的记在excel中,把已笔试的也记下来!
2、代码
from selenium import webdriver
from collections import deque
import pandas as pd
index_jin = None # 定位“今天”的索引
my_month = None # 记录月份
def first_extract(url):
'''
:param url:
:return:清洗后的今天以后的笔试,因为如果笔试已经过去就没啥意义再看了。
'''
global index_jin
global my_month
# 实例化ChromeOptions类
options = webdriver.ChromeOptions()
# 加参配置
options.add_argument('--disable-gpu')
options.add_argument('blink-settings=imagesEnabled=false')
options.add_argument('disable-infobars')
# 加入配置,初始化驱动
driver = webdriver.Chrome(chrome_options=options)
driver.get(url=url)
# 浏览器最大化
driver.maximize_window()
# 找到当前年份与月份
current_month = driver.find_element_by_xpath(xpath='//div[@class="current-date"]/span[@class="current-mouth"]')
current_month = str(current_month.text)
my_month = current_month.split('-')[-1]
# 找到所有信息
all = driver.find_elements_by_xpath(xpath='//div[@id="jsCpn_9_component_0"]//td')
# print(all)
d = deque([])
for i in range(len(all)):
# 进行初步的清洗
if all[i].text == '':
continue
content = str(all[i].text)
content = content.replace('\n',' ').strip(' +').strip(' 我要添加').strip(' +')
content = content.strip('技术类').strip('非技术类')
if '今' in content:
index_jin = i
content = content.strip('今 ')
if len(content.split(' ')[0]) == 1:
content = current_month +'-0' + content
else:
content = current_month + '-' + content
import re
p = re.compile('(.*?)')
M = set(p.findall(content))
for m in M:
content = content.replace(m,'')
d.append(content)
return list(d)
def last_extract(my_list):
my_list = my_list[index_jin:] # 把今天之前的给去掉,因为我们只关心今天之后的日期
d = {}
for l in my_list:
l = l.split(' ')
i = 2
the_date = l[0]
while i < len(l):
if d.get(l[i]):
d[l[i]].append(f'{the_date}.{l[i-1]}')
else:
d[l[i]] = []
d[l[i]].append(f'{the_date}.{l[i-1]}')
i += 2
return d
mylist = first_extract(url='https://www.nowcoder.com/school/calendar')
mydict = last_extract(mylist)
def new(my_dict):
company = pd.read_excel(io=r'C:\Users\admin\Desktop\校招.xlsx',header=0)
company['已看过'] = company['已看过'].astype(str)
checked_company1 = list(company['已看过'])
checked_company2 = []
for c in checked_company1:
checked_company2.append(c.upper())
no = []
for d in mydict.keys():
d = d.upper()
if d not in checked_company2:
no.append(d)
for n in no:
for c in checked_company2:
if c in n:
no.remove(n)
for c in checked_company2:
for n in no:
if n in c:
no.remove(n)
break
print(f'!!!牛客网上{my_month}月的有笔试的公司中,你还没看过这些公司:!!!\n{no}')
def look_your_not_test(my_dict):
company = pd.read_excel(io=r'C:\Users\admin\Desktop\校招.xlsx',header=0)
company['已投'] = company['已投'].astype(str)
company['已笔试'] = company['已笔试'].astype(str)
# 找出还没笔试的
checked_company1 = list(company['已投'])
checked_company2 = list(company['已笔试'])
for c2 in checked_company2:
if c2 in checked_company1:
checked_company1.remove(c2)
last = []
for c in checked_company1:
for k in my_dict.keys():
if c in k or c == k:
last.append(k)
print(f'{my_month}月你已投但未参加笔试的公司有相关笔试安排的公司如下:')
print(last)
print(f'你已投但未参加笔试的公司{my_month}月的笔试安排如下:')
for l in last:
print(f'{l}:{my_dict[l]}')
new(mydict)
look_your_not_test(mydict)
3、结果
selenium自动化的过程图上传失败了,输出凑合看看吧。