import pandas as pd
import urllib.request
import shutil
import zipfile
import os
try:
os.system('mkdir bike_data')
except:
os.system('rm -rf bike_data; mkdir bike_data')
data_source = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip'
zipname = 'bike_data/Bike-Sharing-Dataset.zip'
urllib.request.urlretrieve(data_source, zipname)
zip_ref = zipfile.ZipFile(zipname, 'r')
zip_ref.extractall('bike_data')
zip_ref.close()
daily_path = 'bike_data/day.csv'
daily_data = pd.read_csv(daily_path)
daily_data['dteday'] = pd.to_datetime(daily_data['dteday'])
drop_list = ['instant', 'season', 'yr', 'mnth', 'holiday', 'workingday', 'weathersit', 'atemp', 'hum']
daily_data.drop(drop_list, inplace = True, axis = 1)
daily_data.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
dteday |
weekday |
temp |
windspeed |
casual |
registered |
cnt |
0 |
2011-01-01 |
6 |
0.344167 |
0.160446 |
331 |
654 |
985 |
1 |
2011-01-02 |
0 |
0.363478 |
0.248539 |
131 |
670 |
801 |
2 |
2011-01-03 |
1 |
0.196364 |
0.248309 |
120 |
1229 |
1349 |
3 |
2011-01-04 |
2 |
0.200000 |
0.160296 |
108 |
1454 |
1562 |
4 |
2011-01-05 |
3 |
0.226957 |
0.186900 |
82 |
1518 |
1600 |
from __future__ import division, print_function
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
matplotlib.rc('figure', figsize = (14, 7))
matplotlib.rc('font', size = 14)
matplotlib.rc('axes.spines', top = False, right = False)
matplotlib.rc('axes', grid = False)
matplotlib.rc('axes', facecolor = 'white')
def scatterplot(x_data, y_data, x_label, y_label, title):
fig, ax = plt.subplots()
ax.scatter(x_data, y_data, s = 10, color = '#539caf', alpha =