import openpyxl import pandas as pd import matplotlib.pyplot as plt wb=openpyxl.load_workbook('data.xlsx') worksheet = wb.active max_row = worksheet.max_row def get_max_column_with_data(worksheet):#获取excel中最大列数 max_column = 0 for col in worksheet.columns: if any(cell.value for cell in col): max_column = col[0].column return max_column max_column = get_max_column_with_data(worksheet)#获取excel中最大行数 dates=[] scores=worksheet['D'] for score in scores: dates.append(score.value) dates.remove(dates[0]) dotes=[] for datrs in dates: dotes.append(float(datrs)) rows_to_delete = [] for row in worksheet.iter_rows(min_row=1, max_row=251, min_col=1, max_col=9): for cell in row: if cell.value == '空': rows_to_delete.append(cell.row) break for row_number in reversed(rows_to_delete): worksheet.delete_rows(row_number) wb.save('clean.xlsx') df1=pd.read_excel("clean.xlsx",sheet_name='Sheet',usecols=[0,2,3,4]) print(' 数据的最大值最小值平均数方差记录数等') print(df1.describe()) print('经过爬行策略知,当‘导演’为则填入‘空’,删除excel中有‘空’的横行') print('保存为clean.xlsx') print('数据的空值显示') print(df1.isnull().sum()) class MyClass: def __init__(self): self.value = 0 def turtle1(self): #柱形图1 print('此图的意义是阐明前十部电影的评分情况以及前十名每个电影的评分') plt.rcParams['font.sans-serif'] = 'SimHei' df = pd.read_excel('clean.xlsx', sheet_name='Sheet') mq=df.head(11) x = mq['中文标题'] y = mq['电影评分'] plt.figure(figsize=(10,3), dpi=150) plt.title('202209427李志豪 top250前十电影评分') plt.xlabel('中文标题') plt.ylabel('电影评分') plt.bar(x, y, width=0.6) for a, b in zip(x, y): plt.text(x=a, y=b, s=b, color='black',fontsize=8, ha='center') plt.xticks(range(len(x))[::2]) plt.legend(['202209427李志豪 top250评分']) plt.show() def turtle2(self): #柱形图2 print('此图的意义是阐明四名导演各自作品的数量') plt.rcParams['font.sans-serif'] = 'SimHei' x = ['陈凯歌','宫崎骏','克里斯托夫','加布里尔'] y = [1,8,1,1] plt.figure(figsize=(10, 3), dpi=150) plt.title('202209427李志豪 导演和作品数量') plt.xlabel('导演') plt.ylabel('数量') plt.bar(x, y, width=0.6) for a, b in zip(x, y): plt.text(x=a, y=b, s=b, color='black', fontsize=6, ha='center') plt.xticks(range(len(x))) plt.legend(['导演—数量']) plt.show() def turtle3(self): # #趋势图 print('此图的意义是通过图形的模式阐明前十名的评分') plt.rcParams['font.sans-serif'] = 'SimHei' df = pd.read_excel('clean.xlsx', sheet_name='Sheet') mq=df.head(10) x = mq['中文标题'] y = mq['电影评分'] plt.figure(figsize=(5, 3), dpi=150) plt.title('202209427李志豪 前十的电影评分') plt.stackplot(x, y) plt.xticks([]) cellText = mq[['中文标题']].T.values.tolist() rowLabels = ['中文标题'] colLabels = mq['电影评分'].values.tolist() plt.table(cellText=cellText, rowLabels=rowLabels, colLabels=colLabels) plt.show() def turtle4(self): # 饼图 print('此图的意义是阐明前十个电影的各自的得分占前十个电影的评分情况') plt.rcParams['font.sans-serif'] = 'SimHei' df = pd.read_excel('clean.xlsx', sheet_name='Sheet') plt.figure(figsize=(5, 3), dpi=150) mq = df.head(10) plt.pie(x=mq['电影评分'], autopct='%.1f%%', labels=mq['中文标题']) plt.title('202209427李志豪 前十名对应评分占比情况') plt.show() def turtle5(self): print('此图的含义是通过散点阐明前十个电影的评分差距') plt.rcParams['font.sans-serif'] = 'SimHei' df = pd.read_excel('clean.xlsx', sheet_name='Sheet') mq=df.head(8) x = mq['中文标题'] y = mq['电影评分'] plt.figure(figsize=(5, 3), dpi=150) plt.xlabel('202209427李志豪 中文标题') plt.ylabel('电影评分') plt.xticks(fontsize=6) plt.scatter(x, y) for a, b in zip(x, y): plt.text(x=a, y=b, s=b, color='black', fontsize=8, ha='center') plt.show() if __name__ == '__main__': my_instance = MyClass() my_instance.turtle1() my_instance.turtle2() my_instance.turtle3() my_instance.turtle4() my_instance.turtle5()
@刘二爱和胡辣汤