一、系统架构设计
技术选型矩阵
功能模块 | 技术方案 | 依赖库 |
---|---|---|
数据读取 | Excel/CSV解析 | pandas |
基础可视化 | 静态图表生成 | matplotlib |
地理可视化 | 交互式地图 | pyecharts |
文本分析 | 中文分词与词云 | jieba+wordcloud |
关系网络 | 图结构可视化 | networkx |
人脸识别 | 云API调用 | tencentcloud-sdk-python |
界面交互 | 可视化看板 | streamlit(可选) |
二、开发环境搭建
2.1 环境配置
# 创建虚拟环境
python -m venv student_analysis
source student_analysis/bin/activate # Linux/Mac
student_analysis\Scripts\activate # Windows
# 安装核心依赖
pip install pandas matplotlib pyecharts jieba wordcloud networkx pillow python-dotenv
# 腾讯云SDK
pip install tencentcloud-sdk-python
2.2 配置文件(.env)
TENCENT_SECRET_ID=your_secret_id
TENCENT_SECRET_KEY=your_secret_key
DATA_PATH=./students.xlsx
三、核心功能实现
3.1 数据加载模块
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
def load_data():
df = pd.read_excel(os.getenv('DATA_PATH'))
# 数据预处理
df['成绩'] = pd.to_numeric(df['成绩'], errors='coerce')
df['省份'] = df['省份'].str.replace('省|市', '') # 标准化地名
return df.dropna(subset=['性别', '省份', '成绩'])
3.2 可视化模块
性别分布饼图
def plot_gender(df):
gender_count = df['性别'].value_counts()
plt.figure(figsize=(8,6))
plt.pie(gender_count, labels=gender_count.index,
autopct='%1.1f%%', colors=['#ff9999','#66b3ff'])
plt.title("Gender Distribution")
plt.savefig('gender_pie.png')
中国省份分布地图
from pyecharts.charts import Map
def plot_province_map(df):
province_dist = df['省份'].value_counts().to_dict()
map_chart = Map()
map_chart.add("学生分布",
[list(z) for z in province_dist.items()],
"china")
map_chart.set_global_opts(title_opts=opts.TitleOpts(title="生源地分布"))
map_chart.render("province_map.html")
城市分布柱状图
def plot_city_bar(df):
plt.figure(figsize=(12,6))
city_top20 = df['城市'].value_counts()[:20]
city_top20.plot(kind='bar', color='#2ca02c')
plt.title("Top 20 Cities")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('city_bar.png')
签名词云
from wordcloud import WordCloud
import jieba
def generate_wordcloud(df):
text = ' '.join(df['签名'].dropna())
wordlist = jieba.cut(text)
word_str = ' '.join(wordlist)
wc = WordCloud(font_path='msyh.ttc',
background_color='white',
max_words=200)
wc.generate(word_str)
wc.to_file('wordcloud.png')
成绩分布折线图
def plot_score_dist(df):
plt.figure(figsize=(10,6))
bins = range(0, 101, 5)
df['成绩区间'] = pd.cut(df['成绩'], bins)
score_dist = df.groupby('成绩区间').size()
plt.plot(score_dist.index.astype(str),
score_dist.values,
marker='o',
color='#d62728')
plt.title("Score Distribution")
plt.xticks(rotation=45)
plt.savefig('score_line.png')
3.3 高级功能模块
宿舍关系网络图
import networkx as nx
def plot_dorm_network(df):
G = nx.Graph()
# 添加宿舍节点
dorms = df['宿舍号'].unique()
G.add_nodes_from(dorms, node_type='dorm')
# 添加学生节点并建立关联
for _, row in df.iterrows():
G.add_node(row['学号'], node_type='student')
G.add_edge(row['学号'], row['宿舍号'])
plt.figure(figsize=(15,10))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=500)
plt.savefig('dorm_network.png')
腾讯云人脸识别
from tencentcloud.common import credential
from tencentcloud.iai.v20200303 import iai_client, models
def detect_faces(image_path):
cred = credential.Credential(
os.getenv('TENCENT_SECRET_ID'),
os.getenv('TENCENT_SECRET_KEY'))
client = iai_client.IaiClient(cred, "ap-guangzhou")
with open(image_path, "rb") as f:
image = f.read()
req = models.DetectFaceRequest()
req.Image = image
req.NeedFaceAttributes = 1
return client.DetectFace(req)
四、系统集成与运行
4.1 主程序
import os
import matplotlib.pyplot as plt
if __name__ == "__main__":
df = load_data()
# 基础可视化
plot_gender(df)
plot_city_bar(df)
generate_wordcloud(df)
plot_score_dist(df)
# 高级可视化
plot_province_map(df)
plot_dorm_network(df)
# 人脸识别示例
result = detect_faces("sample_face.jpg")
print("人脸特征:", result.FaceInfos[0].FaceAttributesInfo)
4.2 运行指令
python main.py
4.3 输出结果
生成文件列表:
- gender_pie.png # 性别饼图
- city_bar.png # 城市柱状图
- wordcloud.png # 签名词云
- score_line.png # 成绩分布
- province_map.html # 交互式地图
- dorm_network.png # 宿舍关系图
五、设计思想与优化建议
5.1 核心设计原则
-
模块化设计:每个可视化功能独立实现
-
数据驱动:采用pandas统一处理数据源
-
渐进增强:基础功能先行,高级功能可选
-
云原生:敏感操作通过API实现
5.2 性能优化方向
# 内存优化示例:分块处理大数据
chunk_size = 1000
for chunk in pd.read_csv('big_data.csv', chunksize=chunk_size):
process_chunk(chunk)
5.3 安全建议
# 敏感数据处理
from cryptography.fernet import Fernet
key = Fernet.generate_key()
cipher = Fernet(key)
encrypted_data = cipher.encrypt(b"Sensitive Info")
decrypted_data = cipher.decrypt(encrypted_data)