💖💖作者:计算机毕业设计杰瑞
💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
💛💛想说的话:感谢大家的关注与支持!
💜💜
网站实战项目
安卓/小程序实战项目
大数据实战项目
深度学校实战项目
计算机毕业设计选题推荐
目录
基于大数据的豆瓣电影数据可视化分析系统介绍
基于大数据的豆瓣电影数据可视化分析系统是一个集数据采集、存储、分析和展示于一体的综合性平台,该系统充分利用Hadoop分布式存储架构和Spark大数据处理引擎的优势,对海量豆瓣电影数据进行深度挖掘和智能分析。系统采用Django作为后端开发框架,通过Python语言实现复杂的数据处理逻辑,前端运用Vue框架结合ElementUI组件库构建用户友好的交互界面,并集成Echarts图表库实现丰富的数据可视化效果。整个系统包含系统首页、我的信息、用户管理、豆瓣电影数据管理、大屏可视化和数据可视化分析等核心功能模块,能够对电影的评分分布、类型偏好、年代趋势、地域特色等多维度数据进行深入分析。系统通过HDFS实现海量数据的可靠存储,利用Spark SQL进行高效的数据查询和统计分析,结合Pandas和NumPy等数据科学工具包进行精确的数值计算和数据处理,最终将分析结果以直观的图表形式呈现给用户,为电影行业研究、用户行为分析和市场趋势预测提供有力的数据支撑。
基于大数据的豆瓣电影数据可视化分析系统演示视频
【数据分析】基于大数据的豆瓣电影数据可视化分析系统 | 可视化分析大屏 大数据毕设实战项目 选题推荐 文档指导+ppt+运行部署 Hadoop SPark
基于大数据的豆瓣电影数据可视化分析系统演示图片
基于大数据的豆瓣电影数据可视化分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, desc, count, avg, when, regexp_extract
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
# 豆瓣电影数据管理核心功能
def movie_data_management(request):
spark = SparkSession.builder.appName("MovieDataManagement").config("spark.sql.adaptive.enabled", "true").getOrCreate()
hdfs_path = "hdfs://localhost:9000/movie_data/douban_movies.parquet"
df = spark.read.parquet(hdfs_path)
df.createOrReplaceTempView("movies")
if request.method == 'POST':
data = json.loads(request.body)
movie_name = data.get('movie_name', '')
movie_type = data.get('movie_type', '')
rating_min = data.get('rating_min', 0)
rating_max = data.get('rating_max', 10)
query = f"""
SELECT movie_id, movie_name, director, actors, movie_type, rating, year, country, language
FROM movies
WHERE movie_name LIKE '%{movie_name}%'
AND movie_type LIKE '%{movie_type}%'
AND rating >= {rating_min} AND rating <= {rating_max}
ORDER BY rating DESC
LIMIT 100
"""
result_df = spark.sql(query)
movies_list = result_df.collect()
processed_data = []
for row in movies_list:
movie_dict = {
'movie_id': row['movie_id'],
'movie_name': row['movie_name'],
'director': row['director'],
'actors': row['actors'],
'movie_type': row['movie_type'],
'rating': float(row['rating']) if row['rating'] else 0.0,
'year': row['year'],
'country': row['country'],
'language': row['language']
}
processed_data.append(movie_dict)
spark.stop()
return JsonResponse({'status': 'success', 'data': processed_data, 'total': len(processed_data)})
# 大屏可视化数据处理功能
def dashboard_visualization(request):
spark = SparkSession.builder.appName("DashboardVisualization").config("spark.sql.adaptive.enabled", "true").getOrCreate()
hdfs_path = "hdfs://localhost:9000/movie_data/douban_movies.parquet"
df = spark.read.parquet(hdfs_path)
df.createOrReplaceTempView("movies")
total_movies_query = "SELECT COUNT(*) as total FROM movies"
total_movies = spark.sql(total_movies_query).collect()[0]['total']
avg_rating_query = "SELECT AVG(rating) as avg_rating FROM movies WHERE rating > 0"
avg_rating = spark.sql(avg_rating_query).collect()[0]['avg_rating']
type_distribution_query = """
SELECT movie_type, COUNT(*) as count
FROM movies
WHERE movie_type IS NOT NULL
GROUP BY movie_type
ORDER BY count DESC
LIMIT 10
"""
type_data = spark.sql(type_distribution_query).collect()
year_trend_query = """
SELECT year, COUNT(*) as movie_count, AVG(rating) as avg_rating
FROM movies
WHERE year >= 2000 AND year <= 2023 AND rating > 0
GROUP BY year
ORDER BY year
"""
year_data = spark.sql(year_trend_query).collect()
rating_distribution_query = """
SELECT
CASE
WHEN rating >= 9.0 THEN '9.0-10.0'
WHEN rating >= 8.0 THEN '8.0-8.9'
WHEN rating >= 7.0 THEN '7.0-7.9'
WHEN rating >= 6.0 THEN '6.0-6.9'
ELSE '6.0以下'
END as rating_range,
COUNT(*) as count
FROM movies
WHERE rating > 0
GROUP BY rating_range
ORDER BY rating_range DESC
"""
rating_dist_data = spark.sql(rating_distribution_query).collect()
dashboard_data = {
'total_movies': total_movies,
'avg_rating': round(float(avg_rating), 2) if avg_rating else 0,
'type_distribution': [{'type': row['movie_type'], 'count': row['count']} for row in type_data],
'year_trend': [{'year': row['year'], 'count': row['movie_count'], 'rating': round(float(row['avg_rating']), 2)} for row in year_data],
'rating_distribution': [{'range': row['rating_range'], 'count': row['count']} for row in rating_dist_data]
}
spark.stop()
return JsonResponse({'status': 'success', 'data': dashboard_data})
# 数据可视化分析功能
def data_visualization_analysis(request):
spark = SparkSession.builder.appName("DataVisualizationAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
hdfs_path = "hdfs://localhost:9000/movie_data/douban_movies.parquet"
df = spark.read.parquet(hdfs_path)
df.createOrReplaceTempView("movies")
analysis_type = request.GET.get('type', 'director_analysis')
if analysis_type == 'director_analysis':
director_query = """
SELECT director, COUNT(*) as movie_count, AVG(rating) as avg_rating
FROM movies
WHERE director IS NOT NULL AND rating > 0
GROUP BY director
HAVING COUNT(*) >= 3
ORDER BY avg_rating DESC, movie_count DESC
LIMIT 20
"""
director_data = spark.sql(director_query).collect()
result_data = [{'director': row['director'], 'movie_count': row['movie_count'], 'avg_rating': round(float(row['avg_rating']), 2)} for row in director_data]
elif analysis_type == 'country_analysis':
country_query = """
SELECT country, COUNT(*) as movie_count, AVG(rating) as avg_rating
FROM movies
WHERE country IS NOT NULL AND rating > 0
GROUP BY country
ORDER BY movie_count DESC
LIMIT 15
"""
country_data = spark.sql(country_query).collect()
result_data = [{'country': row['country'], 'movie_count': row['movie_count'], 'avg_rating': round(float(row['avg_rating']), 2)} for row in country_data]
elif analysis_type == 'correlation_analysis':
correlation_query = """
SELECT year, movie_type, COUNT(*) as count, AVG(rating) as avg_rating
FROM movies
WHERE year >= 2010 AND year <= 2023 AND movie_type IS NOT NULL AND rating > 0
GROUP BY year, movie_type
HAVING COUNT(*) >= 5
ORDER BY year, avg_rating DESC
"""
correlation_data = spark.sql(correlation_query).collect()
processed_correlation = {}
for row in correlation_data:
year = row['year']
if year not in processed_correlation:
processed_correlation[year] = []
processed_correlation[year].append({
'type': row['movie_type'],
'count': row['count'],
'rating': round(float(row['avg_rating']), 2)
})
result_data = processed_correlation
spark.stop()
return JsonResponse({'status': 'success', 'analysis_type': analysis_type, 'data': result_data})
基于大数据的豆瓣电影数据可视化分析系统文档展示
💖💖作者:计算机毕业设计杰瑞
💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
💛💛想说的话:感谢大家的关注与支持!
💜💜
网站实战项目
安卓/小程序实战项目
大数据实战项目
深度学校实战项目
计算机毕业设计选题推荐