前言
💖💖作者:计算机程序员小杨
💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
💛💛想说的话:感谢大家的关注与支持!
💕💕文末获取源码联系 计算机程序员小杨
💜💜
网站实战项目
安卓/小程序实战项目
大数据实战项目
深度学习实战项目
计算机毕业设计选题
💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
开发语言:Python+Java(两个版本都支持)
后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
数据库:MySQL
二.系统内容简介
《招聘岗位数据分析系统》是一个基于大数据技术的人力资源分析平台,采用Hadoop+Spark大数据框架作为核心架构,结合Python语言开发,后端使用Django框架构建RESTful API服务,前端基于Vue.js配合ElementUI组件库和Echarts可视化库实现交互界面。系统通过HDFS分布式文件系统存储海量招聘数据,利用Spark和Spark SQL进行高效的数据处理和分析计算,结合Pandas、NumPy等数据科学库实现复杂的统计分析功能。平台提供用户管理、招聘岗位数据分析管理、大屏可视化、技能需求分析、薪酬水平分析、岗位需求分析、公司福利分析等核心功能模块,通过MySQL数据库存储结构化数据,支持多维度的招聘市场数据挖掘和分析,为企业HR部门、求职者以及相关研究机构提供数据驱动的决策支持,帮助用户深入了解当前就业市场趋势、薪酬分布情况、技能需求变化以及行业发展动态。
三.系统功能演示
【大数据】招聘岗位数据分析系统 计算机毕业设计项目 Hadoop+Spark环境配置 数据科学与大数据技术 附源码+文档+讲解
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import mysql.connector
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("JobAnalysisSystem").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def analyze_salary_distribution(request):
if request.method == 'POST':
data = json.loads(request.body)
city_filter = data.get('city', None)
position_filter = data.get('position', None)
experience_filter = data.get('experience', None)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/job_analysis").option("dbtable", "job_positions").option("user", "root").option("password", "password").load()
if city_filter:
df = df.filter(col("city") == city_filter)
if position_filter:
df = df.filter(col("position_name").contains(position_filter))
if experience_filter:
df = df.filter(col("experience_required") == experience_filter)
salary_stats = df.select("salary_min", "salary_max", "city", "position_name").filter((col("salary_min").isNotNull()) & (col("salary_max").isNotNull()))
salary_avg = salary_stats.withColumn("avg_salary", (col("salary_min") + col("salary_max")) / 2)
percentiles = salary_avg.approxQuantile("avg_salary", [0.25, 0.5, 0.75, 0.9], 0.01)
city_salary_avg = salary_avg.groupBy("city").agg(avg("avg_salary").alias("city_avg_salary"), count("*").alias("job_count")).orderBy(desc("city_avg_salary"))
position_salary_avg = salary_avg.groupBy("position_name").agg(avg("avg_salary").alias("position_avg_salary"), count("*").alias("job_count")).orderBy(desc("position_avg_salary")).limit(20)
salary_distribution = salary_avg.select("avg_salary").rdd.map(lambda x: float(x[0])).collect()
bins = np.histogram(salary_distribution, bins=10)[1]
hist_data = np.histogram(salary_distribution, bins=bins)[0].tolist()
result = {
"percentiles": {"p25": percentiles[0], "p50": percentiles[1], "p75": percentiles[2], "p90": percentiles[3]},
"city_analysis": city_salary_avg.collect(),
"position_analysis": position_salary_avg.collect(),
"distribution": {"bins": bins.tolist(), "counts": hist_data},
"total_jobs": salary_avg.count()
}
return JsonResponse(result)
@csrf_exempt
def analyze_skill_demand(request):
if request.method == 'POST':
data = json.loads(request.body)
time_range = data.get('time_range', 30)
industry_filter = data.get('industry', None)
end_date = datetime.now()
start_date = end_date - timedelta(days=time_range)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/job_analysis").option("dbtable", "job_positions").option("user", "root").option("password", "password").load()
df_filtered = df.filter((col("publish_date") >= start_date.strftime('%Y-%m-%d')) & (col("publish_date") <= end_date.strftime('%Y-%m-%d')))
if industry_filter:
df_filtered = df_filtered.filter(col("industry") == industry_filter)
skills_df = df_filtered.select("job_id", "required_skills", "position_name", "salary_min", "salary_max", "experience_required").filter(col("required_skills").isNotNull())
skills_exploded = skills_df.select("job_id", "position_name", "salary_min", "salary_max", "experience_required", explode(split(col("required_skills"), ",")).alias("skill"))
skills_cleaned = skills_exploded.withColumn("skill", trim(lower(col("skill")))).filter(col("skill") != "")
skill_counts = skills_cleaned.groupBy("skill").agg(count("*").alias("demand_count"), countDistinct("job_id").alias("unique_jobs"), avg((col("salary_min") + col("salary_max")) / 2).alias("avg_salary")).orderBy(desc("demand_count"))
top_skills = skill_counts.limit(50).collect()
experience_skill_analysis = skills_cleaned.groupBy("experience_required", "skill").agg(count("*").alias("count")).orderBy("experience_required", desc("count"))
position_skill_matrix = skills_cleaned.groupBy("position_name").agg(collect_list("skill").alias("skills_list"), count("*").alias("total_jobs")).filter(col("total_jobs") >= 5)
skill_trend_data = skills_cleaned.withColumn("week", date_format(col("publish_date"), "yyyy-ww")).groupBy("week", "skill").agg(count("*").alias("weekly_count")).orderBy("week", "skill")
emerging_skills = skill_counts.filter(col("demand_count") >= 10).orderBy(desc("avg_salary")).limit(20).collect()
result = {
"top_skills": [{"skill": row["skill"], "demand_count": row["demand_count"], "unique_jobs": row["unique_jobs"], "avg_salary": row["avg_salary"]} for row in top_skills],
"experience_analysis": experience_skill_analysis.collect(),
"position_skills": position_skill_matrix.collect(),
"skill_trends": skill_trend_data.collect(),
"emerging_skills": [{"skill": row["skill"], "avg_salary": row["avg_salary"], "demand_count": row["demand_count"]} for row in emerging_skills]
}
return JsonResponse(result)
@csrf_exempt
def analyze_company_benefits(request):
if request.method == 'POST':
data = json.loads(request.body)
company_size_filter = data.get('company_size', None)
industry_filter = data.get('industry', None)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/job_analysis").option("dbtable", "job_positions").option("user", "root").option("password", "password").load()
if company_size_filter:
df = df.filter(col("company_size") == company_size_filter)
if industry_filter:
df = df.filter(col("industry") == industry_filter)
benefits_df = df.select("company_name", "company_size", "industry", "benefits", "salary_min", "salary_max", "position_name").filter(col("benefits").isNotNull())
benefits_exploded = benefits_df.select("company_name", "company_size", "industry", "salary_min", "salary_max", "position_name", explode(split(col("benefits"), ",")).alias("benefit"))
benefits_cleaned = benefits_exploded.withColumn("benefit", trim(lower(col("benefit")))).filter(col("benefit") != "")
benefit_popularity = benefits_cleaned.groupBy("benefit").agg(count("*").alias("frequency"), countDistinct("company_name").alias("company_count"), avg((col("salary_min") + col("salary_max")) / 2).alias("avg_salary_with_benefit")).orderBy(desc("frequency"))
industry_benefits = benefits_cleaned.groupBy("industry", "benefit").agg(count("*").alias("count")).orderBy("industry", desc("count"))
company_size_benefits = benefits_cleaned.groupBy("company_size", "benefit").agg(count("*").alias("count"), avg((col("salary_min") + col("salary_max")) / 2).alias("avg_salary")).orderBy("company_size", desc("count"))
premium_benefits = benefit_popularity.filter(col("avg_salary_with_benefit") > 15000).orderBy(desc("avg_salary_with_benefit")).limit(15)
benefit_correlation = benefits_cleaned.groupBy("company_name").agg(collect_list("benefit").alias("benefits_list"), avg((col("salary_min") + col("salary_max")) / 2).alias("company_avg_salary"))
benefit_salary_impact = benefits_df.join(benefit_popularity.select("benefit", "avg_salary_with_benefit"), benefits_df.benefits.contains(benefit_popularity.benefit), "left").groupBy("benefit").agg(avg("avg_salary_with_benefit").alias("impact_score")).orderBy(desc("impact_score"))
top_benefit_companies = benefits_cleaned.groupBy("company_name").agg(countDistinct("benefit").alias("benefit_variety"), avg((col("salary_min") + col("salary_max")) / 2).alias("avg_salary")).filter(col("benefit_variety") >= 5).orderBy(desc("benefit_variety")).limit(20)
result = {
"benefit_rankings": [{"benefit": row["benefit"], "frequency": row["frequency"], "company_count": row["company_count"], "avg_salary": row["avg_salary_with_benefit"]} for row in benefit_popularity.limit(30).collect()],
"industry_analysis": industry_benefits.collect(),
"company_size_analysis": company_size_benefits.collect(),
"premium_benefits": [{"benefit": row["benefit"], "avg_salary": row["avg_salary_with_benefit"], "frequency": row["frequency"]} for row in premium_benefits.collect()],
"salary_impact": benefit_salary_impact.collect(),
"top_companies": [{"company": row["company_name"], "benefit_count": row["benefit_variety"], "avg_salary": row["avg_salary"]} for row in top_benefit_companies.collect()]
}
return JsonResponse(result)
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨