Python中Series数据类型的describe、index、del、append、loc、广播、迭代

最新推荐文章于 2024-07-08 19:59:20 发布

程志伟

最新推荐文章于 2024-07-08 19:59:20 发布

阅读量862

点赞数

文章标签： python

本文链接：https://blog.csdn.net/c1z2w3456789/article/details/104266318

版权

关注微信号：小程在线

关注CSDN博客：程志伟的博客

Python 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]

Type "copyright", "credits" or "license" for more information.

IPython 7.6.1 -- An enhanced Interactive Python.

import numpy as np

import pandas as pd

#### 1.1创建一个Series类型的数据 ####

price = pd.Series([150,488,192,689,410,266])

price

Out[1]:

0 150

1 488

2 192

3 689

4 410

5 266

dtype: int64

#查看数据的类型

type(price)

Out[2]: pandas.core.series.Series

price = pd.Series([150,488,192,689,410,266],name="price")

price

Out[3]:

0 150

1 488

2 192

3 689

4 410

5 266

Name: price, dtype: int64

temp = {'Mon': 33, 'Tue': 19, 'Wed': 15, 'Thu': 89, 'Fri': 11, 'Sat': -5, 'Sun': 9}

pd.Series(temp)

Out[4]:

Mon 33

Tue 19

Wed 15

Thu 89

Fri 11

Sat -5

Sun 9

dtype: int64

#简单的统计描述

price.mean()

Out[5]: 365.8333333333333

price.sum()

Out[6]: 2195

price.head(2)

Out[7]:

0 150

1 488

Name: price, dtype: int64

#取最后3个数

price.tail(3)

Out[8]:

3 689

4 410

5 266

Name: price, dtype: int64

#### 1.2添加索引 ####

price = pd.Series([15280,45888,15692,55689,28410,27566.3],

index=['wh','sh','hz','bj','gz','nj'])

price

Out[9]:

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

#查看索引

price.index

Out[10]: Index(['wh', 'sh', 'hz', 'bj', 'gz', 'nj'], dtype='object')

#查看索引对应的数值

price['sh']

Out[12]: 45888.0

#给索引添加名字

price.index.name='city'

price

Out[13]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

#用日期作为索引

dates=pd.date_range('2016-01-01','2016-6-01',freq='M')

dates

Out[14]:

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',

'2016-05-31'],

dtype='datetime64[ns]', freq='M')

tempature=pd.Series([13,15,20,27,29],index=dates)

tempature

Out[15]:

2016-01-31 13

2016-02-29 15

2016-03-31 20

2016-04-30 27

2016-05-31 29

Freq: M, dtype: int64

#索引时可以重复的

temp=pd.Series([13,15,20,27,29],index=[0,2,2,3,4])

temp

Out[16]:

0 13

2 15

2 20

3 27

4 29

dtype: int64

temp=pd.Series([13,15,20,27,29])

temp

Out[17]:

0 13

1 15

2 20

3 27

4 29

dtype: int64

temp[0]

Out[18]: 13

temp.loc[0:3] #取索引位

Out[23]:

0 13

1 15

2 20

3 27

dtype: int64

temp.iloc[0:3] #取顺序位

Out[24]:

0 13

1 15

2 20

dtype: int64

temp=pd.Series([13,15,20,27,29],index=['M','T','W','T','F'])

temp

Out[25]:

M 13

T 15

W 20

T 27

F 29

dtype: int64

temp['T']

Out[26]:

T 15

T 27

dtype: int64

temp.M

Out[27]: 13

#### 1.3数据的修改与删除 ####

price

Out[28]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

price['wh']=16000

price

Out[29]:

city

wh 16000.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

price.iloc[0]=15280

price

Out[30]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

#append返回一个新的数据，不改变原来的数据

price.append(pd.Series([9500],index=['cd']))

Out[31]:

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

cd 9500.0

dtype: float64

price

Out[32]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

dtype: float64

#增加新的数据

price['cd']=9500

price

Out[34]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 55689.0

gz 28410.0

nj 27566.3

cd 9500.0

dtype: float64

#使用set_value更改新的数据

price.set_value('bj',62000)

Out[35]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 62000.0

gz 28410.0

nj 27566.3

cd 9500.0

dtype: float64

#使用del删除数据

del price['nj']

price

Out[37]:

city

wh 15280.0

sh 45888.0

hz 15692.0

bj 62000.0

gz 28410.0

cd 9500.0

dtype: float64

#使用布尔值筛选数据

price[price>50000]

Out[38]:

city

bj 62000.0

dtype: float64

#### 1.4数据的描述 ####

price.describe()

Out[40]:

count 6.000000

mean 29461.666667

std 20585.747940

min 9500.000000

25% 15383.000000

50% 22051.000000

75% 41518.500000

max 62000.000000

dtype: float64

#取25%，50%的分位数

price.describe(percentiles=[0.25,0.5])

Out[41]:

count 6.000000

mean 29461.666667

std 20585.747940

min 9500.000000

25% 15383.000000

50% 22051.000000

max 62000.000000

dtype: float64

#对非数值型数据统计

temp=pd.Series(['hw','apple','vivo','mi','hw','oppo','samsung','vivo'],dtype='category')

temp

Out[42]:

0 hw

1 apple

2 vivo

3 mi

4 hw

5 oppo

6 samsung

7 vivo

dtype: category

Categories (6, object): [apple, hw, mi, oppo, samsung, vivo]

temp.value_counts()

Out[43]:

vivo 2

hw 2

samsung 1

oppo 1

mi 1

apple 1

dtype: int64

#### 1.5向量化操作与广播 ####

#将数据乘以2

price * 2

Out[44]:

city

wh 30560.0

sh 91776.0

hz 31384.0

bj 124000.0

gz 56820.0

cd 19000.0

dtype: float64

#将数据增加1000

price+1000

Out[45]:

city

wh 16280.0

sh 46888.0

hz 16692.0

bj 63000.0

gz 29410.0

cd 10500.0

dtype: float64

#将数据取log

np.log(price)

Out[46]:

city

wh 9.634300

sh 10.733959

hz 9.660906

bj 11.034890

gz 10.254496

cd 9.159047

dtype: float64

#list类型数据

li=[2000,3000,3000]

#数据乘以2，表示数据重复2遍

li*2

Out[48]: [2000, 3000, 3000, 2000, 3000, 3000]

s=pd.Series([10,20,30,40])

Out[49]:

0 10

1 20

2 30

3 40

dtype: int64

s2=pd.Series([10,20,30],index=[2,3,4])

Out[50]:

2 10

3 20

4 30

dtype: int64

#相同索引的才进行计算

s+s2

Out[51]:

0 NaN

1 NaN

2 40.0

3 60.0

4 NaN

dtype: float64

#改变数据的类型

s.apply(float)

Out[52]:

0 10.0

1 20.0

2 30.0

3 40.0

dtype: float64

s.astype(int)

Out[53]:

0 10

1 20

2 30

3 40

dtype: int32

#### 1.6数据的迭代 ####

for num in s:

print(num)

#数值存放在values里面

40 in s

Out[55]: False

40 in s.values

Out[56]: True

#0表示索引0

0 in s #series是key-value存储，所以这里实际是index o

Out[57]: True

#li是list类型的数据

li=[10,20,30,40]

#40是li的值

40 in li

Out[58]: True

程志伟

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫