自行车实例
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15,5)
%matplotlib inline
fixed_df = pd.read_csv('Desktop/bikes.csv',encoding='latin1',
sep=';',parse_dates=['Date'],index_col='Date')
fixed_df
fixed_df['Berri 1']
fixed_df['Berri 1'].plot()
fixed_df.plot(figsize=(15,20))
berri_bikes = fixed_df[['Berri 1']].copy()
berri_bikes.head(10)
berri_bikes.index.weekday
berri_bikes.loc[:,'weekday'] = berri_bikes.index.weekday
berri_bikes
weekday_counts = berri_bikes.groupby('weekday').aggregate(sum)
weekday_counts
weekday_counts.index = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
weekday_counts
weekday_counts.plot(kind='bar')
两地销售数据的比较
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
'Desktop/data.txt'
data = np.loadtxt('Desktop/data.txt',delimiter=',',dtype=np.str)
data
def convert_data_to_timeseries(input_file,column,verbose=False):
# 导入数据
data = np.loadtxt(input_file,delimiter=',')
# 确定索引的开始与结束时间
start_date = str(int(data[0,0]))+'-'+str(int(data[0,1]))
end_date = str(int(data[-1,0]+1))+'-'+str(int(data[-1,1]%12+1))
dates = pd.date_range(start_date,end_date,freq='M')
data_timeseries = pd.Series(data[:,column],index=dates)
return data_timeseries
# 查看数据,将A地(第三列)数据成图,查看其是否存在某种趋势
input_file = 'Desktop/data.txt'
column_num = 2
data_timeseries = convert_data_to_timeseries(input_file,column_num)
# plot方法成图
data_timeseries.plot()
plt.title('input_file')
# 图像太密集了,换一个实间范围
start = '2007-2'
end = '2007-11'
plt.figure()
data_timeseries[start:end].plot()
plt.title('Data from ' + start + 'to' + end)
plt.show()
# 加入第二组销售数据,将两组Series数据转化为二维数据结构DataFrame,索引为时间,
# DataFrame结构会利于我们的比较。
data1 = convert_data_to_timeseries(input_file,2)
data2 = convert_data_to_timeseries(input_file,3)
dataframe = pd.DataFrame({'first':data1,'second':data2})
# 将两组数据同时成图
dataframe['1955':'1960'].plot()
plt.title('Data overlapped on top of each other')
# 查看数据,将AB地数据成图,查看两组数据否存在相同趋势
plt.figure()#画布
difference = dataframe['1952':'1955']['first']-dataframe['1952':'1955']['second']
difference.plot()
plt.title('Difference (first - second)')
plt.show()
iris数据集的分类与预测,使用sklearn导入iris数据集
from sklearn import datasets
# 鸢尾花数据集
iris = datasets.load_iris()
# 手写体数据集
digits = datasets.load_digits()
#使用.data() 和.target()方法熟悉导入的数据结构
print(iris.data)
print(iris.target)
# 使用sklearn分割数据
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(
iris.data,iris.target,test_size = 0.4,random_state = 0)
# 选择sklearn的模型
from sklearn import svm
svc = svm.SVC(gamma='auto')
# 使用sklearn模型的训练
svc.fit(X_train,Y_train)
# 使用sklearn进行模型预测
print(svc.predict([[5.84,4.4,6.9,2.5]]))