0427课 初识数据分析
Python数据分析3大神器
例子
用随机方式生成5个学生的3门课程的成绩,求学生的平均分以及每门课程的最高分和最低分
import random
courses = ['语文', '数学', '英语']
students = ['s1', 's2', 's3', 's4', 's5']
n, m = len(students), len(courses)
scores = [[random.randint(60, 100) for _ in range(m)] for _ in range(n)]
print(scores)
for i, score in enumerate(scores):
avg_score = sum(score) / m
print(f'{students[i]}的平均分: {avg_score:.1f}')
'''
s1的平均分: 77.0
s2的平均分: 76.3
s3的平均分: 74.3
s4的平均分: 87.3
s5的平均分: 84.0
'''
for i, course in enumerate(courses):
temp = [scores[j][i] for j in range(n)]
max_score, min_score = max(temp), min(temp)
print(f'{course}的最高分: {max_score}')
print(f'{course}的最低分: {min_score}')
'''
语文的最高分: 95
语文的最低分: 60
数学的最高分: 97
数学的最低分: 67
英语的最高分: 94
英语的最低分: 60
'''
处理成numpy的二维数组
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
courses = ['语文', '数学', '英语']
students = ['s1', 's2', 's3', 's4', 's5']
n, m = len(students), len(courses)
scores = [[random.randint(60, 100) for _ in range(m)] for _ in range(n)]
print(scores)
scores = np.array(scores)
print(scores)
'''
array([[79, 92, 60],
[60, 97, 72],
[66, 86, 71],
[93, 75, 94],
[95, 67, 90]])
'''
np.round(scores.mean(axis=1), 1)
scores.max(axis=0)
scores.min(axis=0)
pandas 二维数组,变成二维表格
score_df =pd.DataFrame(data=scores, columns=courses, index=students)
print(score_df)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/f4ff626ff7fe82fc8e70756d8576a818.png)
avg_score = np.round(score_df.mean(axis=1), 1)
print(avg_score)
'''
s1 77.0
s2 76.3
s3 74.3
s4 87.3
s5 84.0
'''
score_df['平均分'] = avg_score
print(score_df)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/028c4e82a1c67b3b7d009b40db4523f7.png)
max_score = score_df.max(axis=0)
min_score = score_df.min(axis=0)
score_df.loc['最高分'] = max_score
score_df.loc['最低分'] = min_score
print(score_df)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/e1565195a33227e800b62f2a1870e490.png)
将生成的数据表保存到Excel表格中
score_df.to_excel('学生成绩统计表.xlsx')
matplotlib 将表格变为柱状图
plt.rcParams['font.sans-serif'] = ['FangSong']
%config InlineBackend.figure_format = 'svg'
score_df.loc['']
score_df.plot(kind='bar')
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/f8e0f032ed19ddd41073ad5b4f2dbe24.png)