项目二探索美国共享单车数据

import time
import pandas as pd
import numpy as np
import datetime as dt

CITY_DATA = { 'chicago': 'chicago.csv',
          'new york city': 'new_york_city.csv',
          'washington': 'washington.csv' }

def get_filters():
"""
Asks user to specify a city, month, and day to analyze.

Returns:
    (str) city - name of the city to analyze
    (str) month - name of the month to filter by, or "all" to apply no month filter
    (str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\'s explore some US bikeshare data!')
# TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
city = input("Which city do you want to inquiry(Ex:chicago, new york city, washington):").lower()
while city != 'chicago':
    if city == 'new york city':
        break
    if city == 'washington':
        break
    print("Error city!Please try again!(Ex:chicago, new york city, washington)")
    city = input("Which city do you want to inquiry:").lower()
# TO DO: get user input for month (all, january, february, ... , june)
month = input("Which months do you want to inquiry(Ex:all, january, february, march, april, may, june):").lower()
while month != 'all':
    if month == 'january':
        break
    if month == 'february':
        break
    if month == 'march':
        break
    if month == 'april':
        break
    if month == 'may':
        break
    if month == 'june':
        break
    print("Error city!Please try again!(Ex:all, january, february, march, april, may, june)")
    month = input("Which months do you want to inquiry:").lower()
# TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
day = input("Which day of week do you want to inquiry(Ex:all, monday, tuesday, wednesday, thursday, friday, saturday, sunday):").lower()
while day != 'all':
    if day == 'monday':
        break
    if day == 'tuesday':
        break
    if day == 'wednesday':
        break
    if day == 'thursday':
        break
    if day == 'friday':
        break
    if day == 'saturday':
        break
    if day == 'sunday':
        break
    print("Error city!Please try again!(Ex:all, monday, tuesday, wednesday, thursday, friday, saturday, sunday)")
    day = input("Which day of week do you want to inquiry:").lower()
    
print('-'*40)
return city, month, day


def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.

Args:
    (str) city - name of the city to analyze
    (str) month - name of the month to filter by, or "all" to apply no month filter
    (str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
    df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])
df['Start Time'] = pd.to_datetime(df['Start Time'])
df['month'] = df['Start Time'].dt.month
df['day'] = df['Start Time'].dt.weekday_name

if month != 'all':
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    month = months.index(month) + 1
    df = df[df['month'] == month]

if day != 'all':
    df = df[df['day'] == day.title()]

return df


def time_stats(df):
"""Displays statistics on the most frequent times of travel."""

print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()

# TO DO: display the most common month
df['most_common_month'] = df['Start Time'].dt.month
the_most_common_month = df['most_common_month'].mode()[0]
print("Most Common Month:", the_most_common_month)
# TO DO: display the most common day of week
df['most_common_day'] = df['Start Time'].dt.weekday_name
the_most_common_day_of_week = df['most_common_day'].mode()[0]
print("Most Common Day Of Week:", the_most_common_day_of_week)

# TO DO: display the most common start hour
df['most_common_hour'] = df['Start Time'].dt.hour
the_most_common_start_hour = df['most_common_hour'].mode()[0]
print("Most Common Start Hour:", the_most_common_start_hour)

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


def station_stats(df):
"""Displays statistics on the most popular stations and trip."""

print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()

# TO DO: display most commonly used start station
start_station = df['Start Station'].mode()
print("Most Commonly Used Start Station:", start_station[0])

# TO DO: display most commonly used end station
end_station = df['End Station'].mode()
print("Most Commonly Used End Station:", end_station[0])

# TO DO: display most frequent combination of start station and end station trip
for i in df:
    start_end_station = df['Start Station'] + "=" + df['End Station']
start_end_station = start_end_station.mode()[0].split("=")
print('Most Frequent Combination Of Start Station And End Station Trip:',(start_end_station)[0],'To',(start_end_station)[1])

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""

print('\nCalculating Trip Duration...\n')
start_time = time.time()

# TO DO: display total travel time
total_travel_time = df['Trip Duration'].sum()
print('Total Travel Time:',total_travel_time)

# TO DO: display mean travel time
mean_travel_time = df['Trip Duration'].mean()
print('Mean Travel Time:',mean_travel_time)

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


def user_stats(df):
"""Displays statistics on bikeshare users."""

print('\nCalculating User Stats...\n')
start_time = time.time()

# TO DO: Display counts of user types
user_types = df['User Type'].value_counts()
print(user_types)

# TO DO: Display counts of gender
try:
    gender_types = df['Gender'].value_counts()
    print(gender_types)

# TO DO: Display earliest, most recent, and most common year of birth

    earliest_bitrh = df['Birth Year'].min()
    recent_birth = df['Birth Year'].max()
    common_birth = df['Birth Year'].mode()[0]
    print('Earliest Year Of Birth:',earliest_bitrh)
    print('Most Recent Year Of Birth:',recent_birth)
    print('Most Common Year Of Birth:',common_birth)
except:
    print('\nNo date of gender and birth year in washington!')
    
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


def main():
while True:
    city, month, day = get_filters()
    df = load_data(city, month, day)

    time_stats(df)
    station_stats(df)
    trip_duration_stats(df)
    user_stats(df)

    restart = input('\nWould you like to restart? Enter yes or no.\n')
    if restart.lower() != 'yes':
        break


if __name__ == "__main__":
main()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Cyclistic 是一家虚构的公司,提供自行车共享设施。该公司有两大类骑手:休闲骑手和年度会员。休闲骑手是使用单程通行证和全天通行证的人。年度会员有一年的通行证可以使用自行车。 描述性分析 我已经分别使用每个月的数据透视表进行了初步分析。 数据透视表 1:这描述了每个工作日的骑行次数和平均骑行时长。 数据透视表 2:这分析了每种可骑行类型(即停放自行车、经典自行车和电动自行车)的骑行次数和平均骑行长度。 数据透视表 3:它比较了每个成员类型(即休闲和年度)的乘车次数和平均乘车时间。 数据透视表 4:这显示了整个月的平均和最大骑行长度。 数据透视表 5:该表记录了每个起点站的骑行次数和平均骑行长度。 我已经整理了每个月的这些摘要,并放在一个电子表格中,以确定每月和季节性的分析。我发现的一些有趣的事实和趋势如下: 最长的骑行发生在 2021 年 6 月至 7 月,创下大约 38 天的记录! 一年中的几个月,即 5 月到 9 月,自行车骑行的次数很高,7 月本身记录了 822k 的峰值。 11 月至 2 月见证了低乘车人数。2 月创下了最低的降幅,仅有 5 万次骑行。 Streeter Dr & Grand Ave 是最受欢迎的起点站,一个月内乘坐次数高达 15,000 次。 每月比较 其他一些重要的观察结果是: 经典自行车是最受欢迎的选择,每 3 次骑行中就有 2 次使用经典自行车。 停放自行车是骑行超过 1 小时的人的最爱。 休闲骑手的平均骑行长度高于年度会员。几乎是后者的两倍。 周六和周日的骑行次数最多。这两天的平均骑行时间也创下了历史新高。 使用 SQL 进行年视图分析 我将使用 SQL 合并所有的月度数据来做一个整体的年度分析。我将使用 Microsoft SQL Server 来执行所有数据库操作。 所有分步查询都可以在这里找到bikesharecasestudy.sql。请务必查看bikeShareCaseStudyReport.docx以查看所有重要数据和输出。 该分析揭示了年度会员和休闲骑手之间的一些有趣趋势。以下是分析。 年末会员的乘车份额增加(超过 50%),加上临时会员的乘车份额下降。最大值记录在 1 月 (80%)。 休闲骑手的平均骑行时长是年度骑手的两倍多。 年度会员在一年内记录的乘车次数多于休闲骑手。 周六的骑行次数和平均骑行时长最高,其次是周六和周五。周末生意兴隆。 经典自行车是这两种骑手中最喜欢的类型。停靠自行车是最不受欢迎的。此外,休闲会员选择经典自行车进行长途骑行。 临时会员通常更喜欢周末而不是工作日。年度车手则相反,他们在所有日子里的人数都很高,周日大幅下降。 使用 R 进行年视图分析 对于使用 R 的分析,我使用了 4 个季度的数据 - 2019 年的 Q2、Q3、Q4 和 2020 年的 Q1 。可以在此处找到用于清理、合并、分析和可视化的脚本R_script.R。 分享 此阶段是将要向利益相关者展示的重要和相关信息可视化。 年度会员和休闲骑手之间最显着的区别之一是他们在工作日的运营模式。 有报告

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值