import time
import pandas as pd
import numpy as np
import datetime as dt
CITY_DATA = { 'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv' }
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\'s explore some US bikeshare data!')
# TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
city = input("Which city do you want to inquiry(Ex:chicago, new york city, washington):").lower()
while city != 'chicago':
if city == 'new york city':
break
if city == 'washington':
break
print("Error city!Please try again!(Ex:chicago, new york city, washington)")
city = input("Which city do you want to inquiry:").lower()
# TO DO: get user input for month (all, january, february, ... , june)
month = input("Which months do you want to inquiry(Ex:all, january, february, march, april, may, june):").lower()
while month != 'all':
if month == 'january':
break
if month == 'february':
break
if month == 'march':
break
if month == 'april':
break
if month == 'may':
break
if month == 'june':
break
print("Error city!Please try again!(Ex:all, january, february, march, april, may, june)")
month = input("Which months do you want to inquiry:").lower()
# TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
day = input("Which day of week do you want to inquiry(Ex:all, monday, tuesday, wednesday, thursday, friday, saturday, sunday):").lower()
while day != 'all':
if day == 'monday':
break
if day == 'tuesday':
break
if day == 'wednesday':
break
if day == 'thursday':
break
if day == 'friday':
break
if day == 'saturday':
break
if day == 'sunday':
break
print("Error city!Please try again!(Ex:all, monday, tuesday, wednesday, thursday, friday, saturday, sunday)")
day = input("Which day of week do you want to inquiry:").lower()
print('-'*40)
return city, month, day
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])
df['Start Time'] = pd.to_datetime(df['Start Time'])
df['month'] = df['Start Time'].dt.month
df['day'] = df['Start Time'].dt.weekday_name
if month != 'all':
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
df = df[df['month'] == month]
if day != 'all':
df = df[df['day'] == day.title()]
return df
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()
# TO DO: display the most common month
df['most_common_month'] = df['Start Time'].dt.month
the_most_common_month = df['most_common_month'].mode()[0]
print("Most Common Month:", the_most_common_month)
# TO DO: display the most common day of week
df['most_common_day'] = df['Start Time'].dt.weekday_name
the_most_common_day_of_week = df['most_common_day'].mode()[0]
print("Most Common Day Of Week:", the_most_common_day_of_week)
# TO DO: display the most common start hour
df['most_common_hour'] = df['Start Time'].dt.hour
the_most_common_start_hour = df['most_common_hour'].mode()[0]
print("Most Common Start Hour:", the_most_common_start_hour)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()
# TO DO: display most commonly used start station
start_station = df['Start Station'].mode()
print("Most Commonly Used Start Station:", start_station[0])
# TO DO: display most commonly used end station
end_station = df['End Station'].mode()
print("Most Commonly Used End Station:", end_station[0])
# TO DO: display most frequent combination of start station and end station trip
for i in df:
start_end_station = df['Start Station'] + "=" + df['End Station']
start_end_station = start_end_station.mode()[0].split("=")
print('Most Frequent Combination Of Start Station And End Station Trip:',(start_end_station)[0],'To',(start_end_station)[1])
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
print('\nCalculating Trip Duration...\n')
start_time = time.time()
# TO DO: display total travel time
total_travel_time = df['Trip Duration'].sum()
print('Total Travel Time:',total_travel_time)
# TO DO: display mean travel time
mean_travel_time = df['Trip Duration'].mean()
print('Mean Travel Time:',mean_travel_time)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def user_stats(df):
"""Displays statistics on bikeshare users."""
print('\nCalculating User Stats...\n')
start_time = time.time()
# TO DO: Display counts of user types
user_types = df['User Type'].value_counts()
print(user_types)
# TO DO: Display counts of gender
try:
gender_types = df['Gender'].value_counts()
print(gender_types)
# TO DO: Display earliest, most recent, and most common year of birth
earliest_bitrh = df['Birth Year'].min()
recent_birth = df['Birth Year'].max()
common_birth = df['Birth Year'].mode()[0]
print('Earliest Year Of Birth:',earliest_bitrh)
print('Most Recent Year Of Birth:',recent_birth)
print('Most Common Year Of Birth:',common_birth)
except:
print('\nNo date of gender and birth year in washington!')
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
restart = input('\nWould you like to restart? Enter yes or no.\n')
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()
项目二探索美国共享单车数据
最新推荐文章于 2021-01-03 16:55:27 发布