保留原本功能优化以下代码import pandas as pd import numpy as np import matplotlib.pyplot as plt # 1.读取并查看数据 bike_day = pd.read_csv("C:/Users/15020/Desktop/26.bike_day.csv") print(bike_day.head(5)) # 前5行 print(bike_day.tail(2)) #后2行 #2.处理数据并导出到文件 bike_day_user = bike_day[['instant','dteday','yr', 'casual', 'registered']].dropna() bike_day_user.to_csv('bike_day_user.txt', sep=' ',index=False, header=False) #3.读取数据并添加新列并导出到新文件 bike_day_user = pd.read_csv('bike_day_user.txt', sep=' ', header=None, names=['instant','dteday','yr', 'casual',"registered"]) bike_day_user['cnt'] = bike_day_user['casual'] + bike_day_user['registered'] bike_day_user.to_excel('bike_day_user_cnt.xlsx', index=False) #4.读取数据并进行统计 bike_day_user_cnt = pd.read_excel('bike_day_user_cnt.xlsx') print('cnt最大值:',bike_day_user_cnt['cnt'].max()) print('ent最小值:',bike_day_user_cnt['cnt'].min()) print('2011号cnt年平均值:',bike_day_user_cnt[bike_day_user_cnt['yr'] == 0]['cnt'].mean()) print('2012年cnt年平均值:',bike_day_user_cnt[bike_day_user_cnt['yr'] == 1]['cnt'].mean()) print('2011年月严始值:', bike_day_user_cnt[bike_day_user_cnt['yr'] == 0].groupby('mnth')['cnt'].mean()) print('2022年月平均值:', bike_day_user_cnt[bike_day_user_cnt['yr'] == 1].groupby('mnth')['cnt'].mean()) # 5.可视化并保存图像 fig, ax = plt.subplots() ax.barh(bike_day_user_cnt['mnth'], bike_day_user_cnt[bike_day_user_cnt['yr'] == 0].groupby('mnth')['cnt'].mean(), color='blue', label='2011') ax.barh(bike_day_user_cnt['mnth'], bike_day_user_cnt[bike_day_user_cnt['yr'] == 1].groupby('mnth')['cnt'].mean(), color='lightblue', label='2012') ax.set_yticks(np.arange(1,13)) ax.set_yticklabels(['Jan','Feb','Mar', 'Apr', 'May','Jun','Jul','Aug', 'sep', 'Oct','Nov','Dec']) ax.set_xlabel('Average number of shared bike users') ax.set_title('Monthly Average Number of Shared Bike Users in 2011-2012') ax.legend() fig.savefig('bike_day_user_cnt.png', dpi=300)