import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
from datetime import datetime
import calendar
import seaborn as sn
bikedata = pd.read_csv('train.csv')
bikedata.describe()
|
season |
holiday |
workingday |
casual |
registered |
count |
count |
10886.000000 |
10886.000000 |
10886.000000 |
10886.000000 |
10886.000000 |
10886.000000 |
mean |
2.506614 |
0.028569 |
0.680875 |
36.021955 |
155.552177 |
191.574132 |
std |
1.116174 |
0.166599 |
0.466159 |
49.960477 |
151.039033 |
181.144454 |
min |
1.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
1.000000 |
25% |
2.000000 |
0.000000 |
0.000000 |
4.000000 |
36.000000 |
42.000000 |
50% |
3.000000 |
0.000000 |
1.000000 |
17.000000 |
118.000000 |
145.000000 |
75% |
4.000000 |
0.000000 |
1.000000 |
49.000000 |
222.000000 |
284.000000 |
max |
4.000000 |
1.000000 |
1.000000 |
367.000000 |
886.000000 |
977.000000 |
print(bikedata.shape)
print(bikedata.head())
print(bikedata.dtypes)
(10886, 7)
datetime season holiday workingday casual registered count
0 2011/1/1 0:00 1 0 0 3 13 16
1 2011/1/1 1:00 1 0 0 8 32 40
2 2011/1/1 2:00 1 0 0 5 27 32
3 2011/1/1 3:00 1 0 0 3 10 13
4 2011/1/1 4:00 1 0 0 0 1 1
datetime object
season int64
holiday int64
workingday int64
casual int64
registered int64
count int64
dtype: object
bikedata['date'] = bikedata.datetime.apply(lambda x:x.split()[0])
print(bikedata.head())
datetime season holiday workingday casual registered count \
0 2011/1/1 0:00 1 0 0 3 13 16
1 2011/1/1 1:00 1 0 0 8 32 40
2 2011/1/1 2:00 1 0 0 5 27 32
3 2011/1/1 3:00 1 0 0 3 10 13
4 2011/1/1 4:00 1 0 0 0 1 1
date
0 2011/1/1
1 2011/1/1
2 2011/1/1
3 2011/1/1
4 2011/1/1
bikedata['hour'] = bikedata.datetime.apply(lambda x:x.split()[1].split(':')[0])
print(bikedata.head())
datetime season holiday workingday casual registered count \
0 2011/1/1 0:00 1 0 0 3 13 16
1 2011/1/1 1:00 1 0 0 8 32 40
2 2011/1/1 2:00 1 0 0 5 27 32
3 2011/1/1 3:00 1 0 0 3 10 13
4 2011/1/1 4:00 1 0 0 0 1 1
date hour
0 2011/1/1 0
1 2011/1/1 1
2 2011/1/1 2
3 2011/1/1 3
4 2011/1/1 4
bikedata['weekday']=bikedata.date.apply(lambda dateString:calendar.day_name[datetime.strptime(dateString,'%Y/%m/%d').weekday()])
print(bikedata.head())
datetime season holiday workingday casual registered count \
0 2011/1/1 0:00 1 0 0 3 13 16
1 2011/1/1 1:00 1 0 0 8 32 40
2 2011/1/1 2:00 1 0 0 5 2