Intel-ML-Week1

最新推荐文章于 2024-07-09 10:38:15 发布

qq_37491711

最新推荐文章于 2024-07-09 10:38:15 发布

阅读量188

点赞数

本文链接：https://blog.csdn.net/qq_37491711/article/details/104399386

版权

import pandas as pd
import numpy as np
step_data = [3620,7891,9761,
            3907,4338,5373]
print(type(step_data))
step_counts = pd.Series(step_data,name='steps')
print(step_counts)
print(type(step_counts))
step_counts.index = pd.date_range('20200219',periods=6)
print(step_counts)
print(step_counts['2020-02-23']) #Just like a dictionary
print(step_counts[4])#Or by index position-like an array
print(step_counts['2020-02'])#Select all of February

print(step_counts.dtypes)#View the data type
step_counts = step_counts.astype(np.float)#Convert to a float
print(step_counts.dtypes)
step_counts[1:3]=np.NaN #Create invaild data
step_counts = step_counts.fillna(0.)#Now fill it in with zeros
print(step_counts[1:3])
print(step_counts)

cycling_data = [10.7, 0, None, 2.4, 15.3, 10.9, 0, None] #Cycling distance
joined_data = list(zip(step_data,cycling_data)) #Creats a tuple of data
#zip() 函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回由这些元组组成的列表。如果各个迭代器的元素个数不一致，
#则返回列表长度与最短的对象相同，利用 * 号操作符，可以将元组解压为列表。
#list() 方法用于将元组转换为列表。注：元组与列表是非常类似的，区别在于元组的元素值不能修改，元组是放在括号中，列表是放于方括号中。

activity_df = pd.DataFrame(joined_data)#The datafame #DataFrame是Python中Pandas库中的一种数据结构，它类似excel，是一种二维表。
#DataFrame的单元格可以存放数值、字符串等，这和excel表很像，同时DataFrame可以设置列名columns与行名index。
print(activity_df)
activity_df = pd.DataFrame(joined_data, index=pd.date_range('20200219',periods=6),columns=['Walking','Cycling'])
#Add column(列) names to dataframe  #periods(周期)
print(activity_df)
print(activity_df.loc['2020-2-19'])#loc——通过行标签索引行数据 Select row of data  by index name
print(activity_df.iloc[-3])#iloc——通过行号索引行数据 print(asc) Select row of data by integer position                                
                          #ix——通过行标签或者行号索引行数据（基于loc和iloc 的混合）
print(activity_df['Walking'])#Name of column
print(activity_df.Walking)#Object-oriented approach面向对象方法
print(activity_df.iloc[:,0])#First column
print(activity_df.iloc[:,1])#Second column

filepath = 'data/Iris_Data.csv'#File location of the data file
data = pd.read_csv(filepath)#Import the data
print(data)#Print a few rows
data['sepal_area'] = data.sepal_length*data.sepal_width#Create a new column that is a product of both measurements
print(data.iloc[:5, -3:])#Print a few rows and columns
data['abbrev']=(data.species.apply(lambda x:x.replace('Iris-','')))#The lambda function applies what foloows it to each row of data
                                                                #此函数将后面的内容应用于每一行数据
print(data.iloc[:5,-3:])#Note that there are other ways to accomplish the above

small_data = pd.concat([data.iloc[:2],data.iloc[-2:]])#Concatenate the first two and last two rows
print(small_data.iloc[:,-3:])#See the 'join' method for SQL style joining of dataframes
group_sizes = (data.groupby('species').size())#Use the size method with a dataframe to get count for a series,use the ,value_counts method
print(group_sizes)
print(data.mean())#Mean calculated on a dataframe #计算数据上的平均值
print(data.petal_length.median())#Median calculated on a Series #按级数计算的中值
print(data.petal_length.mode())#Mode calculated on s series 按系数计算模式
print(data.petal_length.std(),#Standard dev, Variance, and SEM #标准差 方差 
      data.petal_length.var(),
      data.petal_length.sem())
print(data.quantile(0))#As well as quantiles 分位数
print(data.describe())#describe() 函数可以查看数据的基本情况，包括：count 非空值数、mean 平均值、std 标准差、max 最大值、min 最小值、（25%、50%、75%）分位数等

sample = (data.sample(n=5,replace=False,random_state=42))#Sample 5 rows without repalcement
print(sample.iloc[:,-3:])

import matplotlib.pyplot as plt
plt.figure()
plt.plot(data.sepal_length,data.sepal_width,ls='',marker='o')
plt.plot(data.sepal_length,data.sepal_width,ls='',marker='o',label='sepal')
plt.plot(data.petal_length,data.petal_width,ls='',marker='o',label='petal')
plt.show()
plt.hist(data.sepal_length,bins=25)
plt.show()

fig, ax = plt.subplots()
ax.barh(np.arange(10),data.sepal_width.iloc[:10])
#Set poasition of ticks and tick labels
ax.set_yticks(np.arange(0.4,10.4,1.0))
ax.set_yticklabels(np.arange(1,11))
ax.set(xlabel='xlabel',ylabel='ylabel',title='Title')
plt.show()

(data.groupby('species').mean().plot(color=['red','blue','black','green'],fontsize=10.0,figsize=(4,4)))
plt.show()

import seaborn as sns
sns.jointplot(x='sepal_length',y='sepal_width',data=data,size=4)
plt.show()
sns.pairplot(data,hue='species',size=3)
plt.show()

qq_37491711

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫