Series

最新推荐文章于 2023-05-12 01:07:40 发布

weixin_41656898

最新推荐文章于 2023-05-12 01:07:40 发布

阅读量353

点赞数

本文链接：https://blog.csdn.net/weixin_41656898/article/details/103505467

版权

import pandas as pd
import numpy as np

#Series可以由数组或者list转变

L = [1,4,2,5]
I = 'A B dog c'.split()
print(L)
print(I)

[1, 4, 2, 5]
['A', 'B', 'dog', 'c']

S = pd.Series(L)
type(S)

pandas.core.series.Series

S #series是一个一维数组。Pandas 会默然用0到n-1来作为series的index，但也可以自己指定index

0    1
1    4
2    2
3    5
dtype: int64

W=pd.Series(I,index=[1,2,'a','b'])
W

1      A
2      B
a    dog
b      c
dtype: object

2*S #只改数据大小

0     2
1     8
2     4
3    10
dtype: int64

np.sqrt(S)

0    1.000000
1    2.000000
2    1.414214
3    2.236068
dtype: float64

S[1]

S[1:]

1    4
2    2
3    5
dtype: int64

S.values

array([1, 4, 2, 5], dtype=int64)

S.index

RangeIndex(start=0, stop=4, step=1)

W.index

Index([1, 2, 'a', 'b'], dtype='object')

S = pd.Series(L, index = I)
S

A      1
B      4
dog    2
c      5
dtype: int64

S[2]

S['dog']

S[2]==S['dog']

True

S.get('c')

S>2

A      False
B       True
dog    False
c       True
dtype: bool

y = S[S>2]#只保留True的
y

B    4
c    5
dtype: int64

S = S.drop('dog')
S

A    1
B    4
c    5
dtype: int64

np.log(S)

A    0.000000
B    1.386294
c    1.609438
dtype: float64

#You can add to a Series
S['cat']= 5
S

A      1
B      4
c      5
cat    5
dtype: int64

# You can check for empty or missing values
pd.isnull(S)

A      False
B      False
c      False
cat    False
dtype: bool

#You can change values
S['A'] =99
S

A      99
B       4
c       5
cat     5
dtype: int64

'cat' in S#只查index

True

'5' in S

False

type(S)

pandas.core.series.Series

1      A
2      B
a    dog
b      c
dtype: object

'dog'in W

False

'a' in W

True

S**2

A      9801
B        16
c        25
cat      25
dtype: int64

S.sort_values()

B       4
c       5
cat     5
A      99
dtype: int64

S.sort_values(ascending=False)

A      99
cat     5
c       5
B       4
dtype: int64

S[S>8] #只保留大于8的

A    99
dtype: int64

S.argmax()#返回最大值对应的label

D:\python\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: 
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  """Entry point for launching an IPython kernel.





'A'

S['D']= np.nan
S

A      99.0
B       4.0
c       5.0
cat     5.0
D       NaN
dtype: float64

pd.isnull(S)

A      False
B      False
c      False
cat    False
D       True
dtype: bool

y = S[pd.isnull(S)]
y

D   NaN
dtype: float64

z = S[pd.notnull(S)]
z

A      99.0
B       4.0
c       5.0
cat     5.0
dtype: float64

J = ['A', 'B', 'cat', 'd']
I = 'A B dog c'.split()
L = [1,2,3,4]
M = [ 4,5,6,7]

S = pd.Series(L, index = I)
T = pd. Series(M, index = J)
print(S)

A      1
B      2
dog    3
c      4
dtype: int64

print(T)

A      4
B      5
cat    6
d      7
dtype: int64

mysum = S+T #只有两个series共有的index才会相加，否则显示missing value
mysum

A      5.0
B      7.0
c      NaN
cat    NaN
d      NaN
dog    NaN
dtype: float64

a=mysum[pd.isnull(mysum)]
a

c     NaN
cat   NaN
d     NaN
dog   NaN
dtype: float64

np.log(mysum)

A      1.609438
B      1.945910
c           NaN
cat         NaN
d           NaN
dog         NaN
dtype: float64

S1 = pd.Series([1,3,5,7,9], index = 'A B C D E'.split())
S1

A    1
B    3
C    5
D    7
E    9
dtype: int64

S1.index.name = 'team'
S1

team
A    1
B    3
C    5
D    7
E    9
dtype: int64

S2 = S1[S1>6]
S2

team
D    7
E    9
dtype: int64

S3 = S1+ S2
S3

team
A     NaN
B     NaN
C     NaN
D    14.0
E    18.0
dtype: float64

S3.name = 'played both'
S3

team
A     NaN
B     NaN
C     NaN
D    14.0
E    18.0
Name: played both, dtype: float64

s = pd.Series([12, 5, -6, 19], index ='a b c d'.split())
s

a    12
b     5
c    -6
d    19
dtype: int64

# select the second element in two ways
s[1]

s['b']

# Create a numpy array
arr = np.array([2,4,6,8])

# use this to create a Series
s1 = pd.Series(arr)

#Change the numpy array
arr[1] = 99
arr

array([ 2, 99,  6,  8])

#s1也会随之而变
s1

0     2
1    99
2     6
3     8
dtype: int32

s1[2]=88 #全都变
arr

array([ 2, 99, 88,  8])

s = pd.Series(['A', 1.3, 2], index =['letter', 'float', 'int'])
s

letter      A
float     1.3
int         2
dtype: object

type(s)

pandas.core.series.Series

s*2

letter     AA
float     2.6
int         4
dtype: object

np.log(s)

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-106-c523cd7bd073> in <module>
----> 1 np.log(s)


AttributeError: 'str' object has no attribute 'log'

s= s.drop('letter') #只能根据index来drop
s

float    1.3
int        2
dtype: object

s= s.astype(float)#改变数据类型
s

float    1.3
int      2.0
dtype: float64

s.dtype#查看数据类型

dtype('float64')

np.log(s)

float    0.262364
int      0.693147
dtype: float64

s = pd.Series([[1,2], [3,4], [5,6]])
s

0    [1, 2]
1    [3, 4]
2    [5, 6]
dtype: object

s[0]

[1, 2]

s[1:2]

1    [3, 4]
dtype: object

s[0][0]

# What if the series index has repeated names
srepeat = pd.Series([1,0,2,1,2,3], index=['white','white','blue','green','green','yellow'])
srepeat

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

# What are the unique values of the variable values
srepeat.unique() #显示不同数

array([1, 0, 2, 3], dtype=int64)

# provide the counts of the unique values ( a two occurred 2 times etc.)
srepeat.value_counts() #显示不同数出现的次数,以series呈现的

2    2
1    2
3    1
0    1
dtype: int64

b = pd.Series([1,2,3,1,2,1])
b

0    1
1    2
2    3
3    1
4    2
5    1
dtype: int64

b.unique()

array([1, 2, 3], dtype=int64)

bcnt = b.value_counts()
bcnt

1    3
2    2
3    1
dtype: int64

bcnt.dtype

dtype('int64')

type(bcnt)

pandas.core.series.Series

bcnt.astype(float)

1    3.0
2    2.0
3    1.0
dtype: float64

bcnt.dtype #若不创建新的变量，否则改不了

dtype('int64')

z=bcnt.astype(float)

z.dtype

dtype('float64')

bcnt.index =['one', 'two', 'three'] #改index
bcnt

one      3
two      2
three    1
dtype: int64

#Can convert Series into Python dictionary
# The index becomes the key.
test1 = pd.Series([85.0,42,35,21,40],index=['Peter','Paul','Mary','Frank','Jos'])
test1

Peter    85.0
Paul     42.0
Mary     35.0
Frank    21.0
Jos      40.0
dtype: float64

test1_dict = test1.to_dict() #转换成dict
test1_dict

{'Peter': 85.0, 'Paul': 42.0, 'Mary': 35.0, 'Frank': 21.0, 'Jos': 40.0}

#Can convert back into a Series
test1_Series = pd.Series(test1_dict)#再转回series
test1_Series

Peter    85.0
Paul     42.0
Mary     35.0
Frank    21.0
Jos      40.0
dtype: float64

weixin_41656898

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫