Intel-ML-Week1

import pandas as pd
import numpy as np
step_data = [3620,7891,9761,
            3907,4338,5373]
print(type(step_data))
step_counts = pd.Series(step_data,name='steps')
print(step_counts)
print(type(step_counts))
step_counts.index = pd.date_range('20200219',periods=6)
print(step_counts)
print(step_counts['2020-02-23']) #Just like a dictionary
print(step_counts[4])#Or by index position-like an array
print(step_counts['2020-02'])#Select all of February
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

 

print(step_counts.dtypes)#View the data type
step_counts = step_counts.astype(np.float)#Convert to a float
print(step_counts.dtypes)
step_counts[1:3]=np.NaN #Create invaild data
step_counts = step_counts.fillna(0.)#Now fill it in with zeros
print(step_counts[1:3])
print(step_counts)
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

 wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

cycling_data = [10.7, 0, None, 2.4, 15.3, 10.9, 0, None] #Cycling distance
joined_data = list(zip(step_data,cycling_data)) #Creats a tuple of data
#zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。如果各个迭代器的元素个数不一致,
#则返回列表长度与最短的对象相同,利用 * 号操作符,可以将元组解压为列表。
#list() 方法用于将元组转换为列表。注:元组与列表是非常类似的,区别在于元组的元素值不能修改,元组是放在括号中,列表是放于方括号中。

activity_df = pd.DataFrame(joined_data)#The datafame #DataFrame是Python中Pandas库中的一种数据结构,它类似excel,是一种二维表。
#DataFrame的单元格可以存放数值、字符串等,这和excel表很像,同时DataFrame可以设置列名columns与行名index。
print(activity_df)
activity_df = pd.DataFrame(joined_data, index=pd.date_range('20200219',periods=6),columns=['Walking','Cycling'])
#Add column(列) names to dataframe  #periods(周期)
print(activity_df)
print(activity_df.loc['2020-2-19'])#loc——通过行标签索引行数据 Select row of data  by index name
print(activity_df.iloc[-3])#iloc——通过行号索引行数据 print(asc) Select row of data by integer position                                
                          #ix——通过行标签或者行号索引行数据(基于loc和iloc 的混合)
print(activity_df['Walking'])#Name of column
print(activity_df.Walking)#Object-oriented approach面向对象方法
print(activity_df.iloc[:,0])#First column
print(activity_df.iloc[:,1])#Second column
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

 wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

filepath = 'data/Iris_Data.csv'#File location of the data file
data = pd.read_csv(filepath)#Import the data
print(data)#Print a few rows
data['sepal_area'] = data.sepal_length*data.sepal_width#Create a new column that is a product of both measurements
print(data.iloc[:5, -3:])#Print a few rows and columns
data['abbrev']=(data.species.apply(lambda x:x.replace('Iris-','')))#The lambda function applies what foloows it to each row of data
                                                                #此函数将后面的内容应用于每一行数据
print(data.iloc[:5,-3:])#Note that there are other ways to accomplish the above                                     
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

 wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

small_data = pd.concat([data.iloc[:2],data.iloc[-2:]])#Concatenate the first two and last two rows
print(small_data.iloc[:,-3:])#See the 'join' method for SQL style joining of dataframes
group_sizes = (data.groupby('species').size())#Use the size method with a dataframe to get count for a series,use the ,value_counts method
print(group_sizes)
print(data.mean())#Mean calculated on a dataframe #计算数据上的平均值
print(data.petal_length.median())#Median calculated on a Series #按级数计算的中值
print(data.petal_length.mode())#Mode calculated on s series 按系数计算模式
print(data.petal_length.std(),#Standard dev, Variance, and SEM #标准差 方差 
      data.petal_length.var(),
      data.petal_length.sem())
print(data.quantile(0))#As well as quantiles 分位数
print(data.describe())#describe() 函数可以查看数据的基本情况,包括:count 非空值数、mean 平均值、std 标准差、max 最大值、min 最小值、(25%、50%、75%)分位数等
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

 wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

sample = (data.sample(n=5,replace=False,random_state=42))#Sample 5 rows without repalcement
print(sample.iloc[:,-3:])
wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw== 

import matplotlib.pyplot as plt
plt.figure()
plt.plot(data.sepal_length,data.sepal_width,ls='',marker='o')
plt.plot(data.sepal_length,data.sepal_width,ls='',marker='o',label='sepal')
plt.plot(data.petal_length,data.petal_width,ls='',marker='o',label='petal')
plt.show()
plt.hist(data.sepal_length,bins=25)
plt.show()

 

 

fig, ax = plt.subplots()
ax.barh(np.arange(10),data.sepal_width.iloc[:10])
#Set poasition of ticks and tick labels
ax.set_yticks(np.arange(0.4,10.4,1.0))
ax.set_yticklabels(np.arange(1,11))
ax.set(xlabel='xlabel',ylabel='ylabel',title='Title')
plt.show()

 

 

(data.groupby('species').mean().plot(color=['red','blue','black','green'],fontsize=10.0,figsize=(4,4)))
plt.show()

 

import seaborn as sns
sns.jointplot(x='sepal_length',y='sepal_width',data=data,size=4)
plt.show()
sns.pairplot(data,hue='species',size=3)
plt.show()

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值