import pandas as pd
import numpy as np
CITY_DATA = { 'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv' }
CHICAGO = pd.read_csv(CITY_DATA.get('chicago'))
NEW_YORK_CITY = pd.read_csv(CITY_DATA.get('new york city'))
WASHINGTON = pd.read_csv(CITY_DATA.get('washington'))
# TO DO 选择月份
def select_month(month,df):
if(month!='all'):
df['Start Time'] = pd.to_datetime(df['Start Time'])
months = ['january','february','march','april','may','june']
month = months.index(month)+1
df = df[df['Start Time'].dt.month==month]
return df
# TO DO 选择某一天的
def select_day(day,df):
if(day!='all'):
df['Start Time'] = pd.to_datetime(df['Start Time'])
df=df[df['Start Time'].dt.weekday_name == day]
return df
# TO DO 按照用户输入的条件筛选数据
def select_data(city, month ,day):
print("city: {}, month: {}, day:{} ".format(city, month ,day))
# 在函数内访问全局变量需要用 global 关键字
global CITY_DATA
df_city=''
#df_city = pd.read_csv(city+'.csv')
# TO DO 选择的城市
df_city = pd.read_csv(CITY_DATA.get(city))
if(month!='all'):
df_city = select_month(month,df_city)
if(day!='All'):
df_city = select_day(day,df_city)
return df_city
# TO DO Start Time 列中哪个月份最常见
def startt(df):
startcount = pd.to_datetime(df['Start Time']).dt.month
print("在开始时间中最常见的月份是:{} 月份".format(startcount.value_counts().index[0]))
return
# TO DO Start Time 列中,一周中的哪一天最常见
def weekdayc(df):
weekday = pd.to_datetime(df['Start Time']).dt.weekday_name
print("在开始时间中最常见的一周中的一天是:{}".format(weekday.value_counts().index[0]))
return
# TO DO • 起始时间中,一天当中哪个小时最常见?
def hourc(df):
hours = pd.to_datetime(df['Start Time']).dt.hour
print("在开始时间中最常见的一天中最常见的小时是:{}".format(hours.value_counts().index[0]))
# TO DO 计算总骑行时长以及平均骑行时长
def caculate_trip(df):
# 总骑行时长
print("总骑行时长是:{}".format(df['Trip Duration'].sum()))
# 平均骑行时长
print("平均骑行时长是:{}".format(df['Trip Duration'].mean()))
# TO DO 最经常出现的 开始车站 Start Station
def start_station(df):
print("最经常出现的开始车站是:{}".format(df['Start Station'].value_counts().index[0]) )
# TO DO 最经常出现的 结束车站 End Station
def end_station(df):
print("最热的结束车站是:{}".format(df['End Station'].value_counts().index[0]))
# TO DO 计算最热门路线 ,从开始车站到结束车站
def station_line(df):
#lines ="from "+df['End Station']+" to "+df['Start Station']
#print("最热门的路线:{}".format(lines.value_counts().index[0]))
lines = df.groupby(['Start Station', 'End Station']).size().idxmax()
print("最热门的路线是从 {} 到 {}".format(lines[0],lines[1]))
# TO DO • 每种用户类型有多少人
def user_kinds_counts(df):
print("每种用户人数:n")
print(df['User Type'].value_counts())
# TO DO 每种性别人数
def sex_kinds_counts(df):
if 'Gender' in df:
print("每种性别人数:")
print(df['Gender'].value_counts())
else:
print(" sorry,this city has NO DATA")
# TO DO 出生年份,最早,最晚,最常见统计
def birthyear_statistics(df):
if 'Birth Year' in df:
lastyear = df['Birth Year'].max()
earlyyear = df['Birth Year'].min()
yearoften = df['Birth Year'].value_counts().index[0]
print("出生最早的年份是:{}".format(earlyyear))
print("出生最晚的年份是:{}".format(lastyear))
print("出生常见的年份是:{}".format(yearoften))
else:
print(" sorry,this city has NO DATA ")
def main():
# TO DO 判断是否筛选一个城市的数据
while (True):
result = ''
select_city = ''
select_month = ''
select_day = ''
print("input '0' exits ,enter '1' go on")
if(input()=='0'):
break
else:
# TO DO select city
while select_city not in ['chicago','new york city','washington']:
print("select a city,please input one of them: 'chicago' , 'new york city' or 'washington' : ")
select_city = input().lower()
if select_city in ['chicago','new york city','washington']:
break;
# TO DO select month
while select_month not in ['january','february','march','april','may', 'june','all']:
print("select a month,please input one of them: january ,february,march,april,may,june or all :")
select_month = input().lower()
if select_month in ['january','february','march','april','may', 'june','all']:
break
# TO DO select day
while select_day not in ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday','All']:
print("select one day,please input one of them:Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday or all:")
select_day = input().title()
if select_day in ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday','All']:
break
# TO DO 根据用户输入筛选数据
result = select_data(select_city, select_month ,select_day)
# TO DO 统计数据
startt(result)
weekdayc(result)
hourc(result)
caculate_trip(result)
start_station(result)
end_station(result)
station_line(result)
user_kinds_counts(result)
sex_kinds_counts(result)
birthyear_statistics(result)
if __name__ == "__main__":
main()