技术栈:Django、mysql、selenium
项目介绍:后端采用Django框架,使用python语言,爬虫采用selenium自动化工具
新增功能:在线爬虫(视频中没有)
项目结构:
部分代码演示:
#title标题
title = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-title")]/span[@class="job-name"]').text
#address地址
addresses = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-title")]/span[@class="job-area-wrapper"]/span').text.split("·")
address = addresses[0]
#dist行政区
if len(addresses) != 1:dist=addresses[1]
else:dist = ''
#type
type = self.type
tag_list = job.find_elements(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-info")]/ul[@class="tag-list"]/li')
if len(tag_list) == 2:
#educational 教育
educational = tag_list[1].text
#workExperience 工作经验
workExperience = tag_list[0].text
else:
#educational 教育
educational = tag_list[2].text
#workExperience 工作经验
workExperience = tag_list[1].text
#hrName HR名字
hrName = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-info")]/div[@class="info-public"]').text
#hrWork HR职位
hrWork = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-info")]/div[@class="info-public"]/em').text
#workTag 工作标签
workTag = job.find_elements(by=By.XPATH,value='./div[contains(@class,"job-card-footer")]/ul[@class="tag-list"]/li')
workTag = json.dumps(list(map(lambda x:x.text,workTag)))
#pratice 实习单位
pratice = 0
salaries = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]/div[contains(@class,"job-info")]/span[@class="salary"]').text
if salaries.find('K') != -1:
salaries = salaries.split('·')
if len(salaries) ==1:
# salary 薪水
salary = list(map(lambda x:int(x) *1000,salaries[0].replace('K','').split('-')))
#salaryMonth
salaryMonth = '0薪'
else:
salary = list(map(lambda x:int(x) *1000,salaries[0].replace('K','').split('-')))
#salaryMonth
salaryMonth = salaries[1]
else:
salary = list(map(lambda x:int(x),salaries[0].replace('元/天','').split('-')))
# salaryMonth
salaryMonth = '0薪'
pratice = 1
#2023.3.10
#companyTitle 公司名字
companyTitle = job.find_element(by=By.XPATH,value='.//div[@class="job-card-right"]/div[@class="company-info"]/h3/a').text
# companyAvatar 公司头像
companyAvatar = job.find_element(by=By.XPATH,value='.//div[@class="job-card-right"]/div[@class="company-logo"]/a/img').get_attribute('src')
companyInfos = job.find_elements(by=By.XPATH,value='.//div[@class="job-card-right"]/div[@class="company-info"]/ul[@class="company-tag-list"]/li')
if len(companyInfos) == 3:
# companyNature
companyNature = companyInfos[0].text
# companyStatus
companyStatus = companyInfos[1].text
# companyPeople
companyPeoples = companyInfos[2].text
if companyPeoples != "10000人以上":
companyPeople = list(map(lambda x:int(x),companyInfos[2].text.replace("人","").split("-")))
else:
companyPeople = [0, 10000]
else:
# companyNature
companyNature = companyInfos[0].text
# companyStatus
companyStatus = "未融资"
# companyPeople
companyPeoples = companyInfos[1].text
if companyPeoples != "10000人以上":
companyPeople = list(map(lambda x: int(x), companyInfos[1].text.replace("人","").split("-")))
else:
companyPeople = [0, 10000]
# companyTags
companyTags = job.find_element(by=By.XPATH,value='./div[contains(@class,"job-card-footer")]/div[@class="info-desc"]').text
if not companyTags:
companyTags = '无'
else:
companyTags =json.dumps(companyTags.split(','))
# detailUrl
detailUrl = job.find_element(by=By.XPATH,value='.//a[@class="job-card-left"]').get_attribute('href')
# companyUrl
companyUrl = job.find_element(by=By.XPATH,value='.//div[@class="job-card-right"]/div[@class="company-info"]/h3/a').get_attribute('href')
下载地址
链接:https://pan.baidu.com/s/1APbf00e2bx3NMPtO-_wI8g
提取码:y7lb
项目演示全过程视频:
【新模板全过程】[计算机毕业设计]-基于Python的招聘数据可视化系统-Django框架-一键爬虫