💖💖作者:计算机毕业设计杰瑞
💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
💛💛想说的话:感谢大家的关注与支持!
💜💜
网站实战项目
安卓/小程序实战项目
大数据实战项目
深度学校实战项目
计算机毕业设计选题推荐
目录
基于大数据的干豆数据可视化分析系统介绍
本系统是一款基于大数据技术的干豆数据可视化分析平台,采用Hadoop分布式文件系统HDFS作为底层存储架构,利用Spark框架实现海量干豆数据的高效处理与分析。系统后端基于Django框架构建,前端采用Vue+ElementUI+Echarts技术栈实现交互式可视化界面。系统核心功能涵盖干豆数据管理、多维综合排名分析、数据质量分布分析、核心特征分布分析、几何形态特征分析、总体形状质量分析以及生产品种特征分析等多个模块。通过Spark SQL和Pandas进行数据清洗、转换与聚合运算,结合NumPy完成统计计算,最终以Echarts图表形式呈现分析结果。系统支持对干豆的外观特征、质量指标、形态参数等多维度属性进行深度挖掘,能够快速处理TB级别的干豆检测数据,为农产品质量评估、品种优选、生产决策提供数据支撑。整个系统实现了从数据采集、存储、计算到可视化展示的完整大数据处理流程,具备良好的扩展性和实用价值。
基于大数据的干豆数据可视化分析系统演示视频
【数据分析】基于大数据的干豆数据可视化分析系统 | 大数据毕设实战项目 可视化分析大屏 选题推荐 文档指导 运行部署 Hadoop SPark java
基于大数据的干豆数据可视化分析系统演示图片
基于大数据的干豆数据可视化分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, count, sum, stddev, min, max, rank, dense_rank, row_number, percentile_approx, when, round
from pyspark.sql.window import Window
from django.http import JsonResponse
from django.views import View
import json
spark = SparkSession.builder.appName("DryBeanAnalysis").config("spark.sql.warehouse.dir", "hdfs://localhost:9000/user/hive/warehouse").config("spark.executor.memory", "4g").config("spark.driver.memory", "2g").getOrCreate()
class MultiDimensionalRankingAnalysis(View):
def post(self, request):
params = json.loads(request.body)
ranking_type = params.get('ranking_type', 'comprehensive')
limit_num = params.get('limit', 100)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/drybean_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "tb_drybean_data").option("user", "root").option("password", "123456").load()
df_cleaned = df.na.drop(subset=["area", "perimeter", "major_axis", "minor_axis", "aspect_ratio", "eccentricity", "convex_area", "equiv_diameter", "extent", "solidity", "roundness", "compactness", "shape_factor1", "shape_factor2", "shape_factor3", "shape_factor4"])
if ranking_type == 'comprehensive':
df_normalized = df_cleaned.withColumn("area_norm", (col("area") - df_cleaned.agg(min("area")).collect()[0][0]) / (df_cleaned.agg(max("area")).collect()[0][0] - df_cleaned.agg(min("area")).collect()[0][0]))
df_normalized = df_normalized.withColumn("roundness_norm", (col("roundness") - df_cleaned.agg(min("roundness")).collect()[0][0]) / (df_cleaned.agg(max("roundness")).collect()[0][0] - df_cleaned.agg(min("roundness")).collect()[0][0]))
df_normalized = df_normalized.withColumn("compactness_norm", (col("compactness") - df_cleaned.agg(min("compactness")).collect()[0][0]) / (df_cleaned.agg(max("compactness")).collect()[0][0] - df_cleaned.agg(min("compactness")).collect()[0][0]))
df_normalized = df_normalized.withColumn("solidity_norm", (col("solidity") - df_cleaned.agg(min("solidity")).collect()[0][0]) / (df_cleaned.agg(max("solidity")).collect()[0][0] - df_cleaned.agg(min("solidity")).collect()[0][0]))
df_scored = df_normalized.withColumn("comprehensive_score", round((col("area_norm") * 0.25 + col("roundness_norm") * 0.3 + col("compactness_norm") * 0.25 + col("solidity_norm") * 0.2) * 100, 2))
window_spec = Window.orderBy(col("comprehensive_score").desc())
df_ranked = df_scored.withColumn("ranking", dense_rank().over(window_spec))
result_df = df_ranked.select("id", "bean_type", "area", "roundness", "compactness", "solidity", "comprehensive_score", "ranking").limit(limit_num)
elif ranking_type == 'quality':
df_quality = df_cleaned.withColumn("quality_score", round((col("roundness") * 40 + col("compactness") * 35 + col("solidity") * 25), 2))
window_spec = Window.orderBy(col("quality_score").desc())
df_ranked = df_quality.withColumn("ranking", row_number().over(window_spec))
result_df = df_ranked.select("id", "bean_type", "roundness", "compactness", "solidity", "quality_score", "ranking").limit(limit_num)
elif ranking_type == 'shape':
df_shape = df_cleaned.withColumn("shape_score", round((col("aspect_ratio") * 0.3 + col("eccentricity") * 0.25 + col("extent") * 0.25 + col("shape_factor1") * 0.2) * 100, 2))
window_spec = Window.orderBy(col("shape_score").desc())
df_ranked = df_shape.withColumn("ranking", dense_rank().over(window_spec))
result_df = df_ranked.select("id", "bean_type", "aspect_ratio", "eccentricity", "extent", "shape_factor1", "shape_score", "ranking").limit(limit_num)
result_list = result_df.collect()
ranking_data = [row.asDict() for row in result_list]
return JsonResponse({"code": 200, "message": "多维排名分析完成", "data": ranking_data})
class DataQualityDistributionAnalysis(View):
def post(self, request):
params = json.loads(request.body)
quality_indicator = params.get('indicator', 'roundness')
interval_num = params.get('interval', 10)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/drybean_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "tb_drybean_data").option("user", "root").option("password", "123456").load()
df_valid = df.filter(col(quality_indicator).isNotNull())
stats = df_valid.agg(min(quality_indicator).alias("min_val"), max(quality_indicator).alias("max_val")).collect()[0]
min_value = float(stats["min_val"])
max_value = float(stats["max_val"])
interval_width = (max_value - min_value) / interval_num
distribution_data = []
for i in range(interval_num):
lower_bound = min_value + i * interval_width
upper_bound = min_value + (i + 1) * interval_width
if i == interval_num - 1:
interval_df = df_valid.filter((col(quality_indicator) >= lower_bound) & (col(quality_indicator) <= upper_bound))
else:
interval_df = df_valid.filter((col(quality_indicator) >= lower_bound) & (col(quality_indicator) < upper_bound))
interval_count = interval_df.count()
interval_avg = interval_df.agg(avg(quality_indicator)).collect()[0][0]
interval_label = f"[{lower_bound:.2f}, {upper_bound:.2f}]" if i < interval_num - 1 else f"[{lower_bound:.2f}, {upper_bound:.2f}]"
distribution_data.append({"interval": interval_label, "count": interval_count, "avg_value": round(float(interval_avg) if interval_avg else 0, 4), "lower_bound": round(lower_bound, 2), "upper_bound": round(upper_bound, 2)})
total_count = df_valid.count()
for item in distribution_data:
item["percentage"] = round((item["count"] / total_count) * 100, 2) if total_count > 0 else 0
quality_stats = df_valid.agg(avg(quality_indicator).alias("mean"), stddev(quality_indicator).alias("std"), percentile_approx(quality_indicator, 0.25).alias("q1"), percentile_approx(quality_indicator, 0.5).alias("median"), percentile_approx(quality_indicator, 0.75).alias("q3")).collect()[0]
statistics_summary = {"mean": round(float(quality_stats["mean"]), 4), "std": round(float(quality_stats["std"]), 4), "q1": round(float(quality_stats["q1"]), 4), "median": round(float(quality_stats["median"]), 4), "q3": round(float(quality_stats["q3"]), 4), "min": round(min_value, 4), "max": round(max_value, 4)}
return JsonResponse({"code": 200, "message": "数据质量分布分析完成", "data": {"distribution": distribution_data, "statistics": statistics_summary, "total_samples": total_count}})
class ProductionVarietyCharacteristicAnalysis(View):
def post(self, request):
params = json.loads(request.body)
analysis_dimensions = params.get('dimensions', ['area', 'perimeter', 'roundness', 'compactness'])
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/drybean_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "tb_drybean_data").option("user", "root").option("password", "123456").load()
df_grouped = df.groupBy("bean_type")
agg_exprs = []
for dim in analysis_dimensions:
agg_exprs.append(avg(dim).alias(f"{dim}_avg"))
agg_exprs.append(stddev(dim).alias(f"{dim}_std"))
agg_exprs.append(min(dim).alias(f"{dim}_min"))
agg_exprs.append(max(dim).alias(f"{dim}_max"))
agg_exprs.append(count(dim).alias(f"{dim}_count"))
agg_exprs.append(count("*").alias("total_count"))
variety_stats = df_grouped.agg(*agg_exprs)
variety_list = variety_stats.collect()
variety_data = []
for row in variety_list:
variety_info = {"bean_type": row["bean_type"], "total_count": row["total_count"], "characteristics": {}}
for dim in analysis_dimensions:
variety_info["characteristics"][dim] = {"average": round(float(row[f"{dim}_avg"]) if row[f"{dim}_avg"] else 0, 4), "std_dev": round(float(row[f"{dim}_std"]) if row[f"{dim}_std"] else 0, 4), "min_value": round(float(row[f"{dim}_min"]) if row[f"{dim}_min"] else 0, 4), "max_value": round(float(row[f"{dim}_max"]) if row[f"{dim}_max"] else 0, 4), "valid_count": row[f"{dim}_count"]}
variety_data.append(variety_info)
variety_comparison = {}
for dim in analysis_dimensions:
dim_values = [(v["bean_type"], v["characteristics"][dim]["average"]) for v in variety_data]
dim_values_sorted = sorted(dim_values, key=lambda x: x[1], reverse=True)
variety_comparison[dim] = {"ranking": [{"bean_type": item[0], "value": item[1]} for item in dim_values_sorted], "max_variety": dim_values_sorted[0][0] if dim_values_sorted else None, "min_variety": dim_values_sorted[-1][0] if dim_values_sorted else None, "difference": round(dim_values_sorted[0][1] - dim_values_sorted[-1][1], 4) if len(dim_values_sorted) > 1 else 0}
correlation_analysis = {}
for i in range(len(analysis_dimensions)):
for j in range(i + 1, len(analysis_dimensions)):
dim1, dim2 = analysis_dimensions[i], analysis_dimensions[j]
corr_df = df.select(dim1, dim2).na.drop()
corr_value = corr_df.stat.corr(dim1, dim2)
correlation_analysis[f"{dim1}_vs_{dim2}"] = round(corr_value, 4) if corr_value else 0
return JsonResponse({"code": 200, "message": "生产品种特征分析完成", "data": {"variety_statistics": variety_data, "variety_comparison": variety_comparison, "correlation_analysis": correlation_analysis}})
基于大数据的干豆数据可视化分析系统文档展示
💖💖作者:计算机毕业设计杰瑞
💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
💛💛想说的话:感谢大家的关注与支持!
💜💜
网站实战项目
安卓/小程序实战项目
大数据实战项目
深度学校实战项目
计算机毕业设计选题推荐