数模经验-数据处理-pandas

最新推荐文章于 2024-03-23 22:24:44 发布

熊熊想读研究生

最新推荐文章于 2024-03-23 22:24:44 发布

阅读量87

点赞数

文章标签： pandas

本文链接：https://blog.csdn.net/kingirlder/article/details/134368108

版权

数模经验-数据处理-pandas

代码的具体解释：下次补上

import pandas as pd
import numpy as np

# # 设置panda显示函数
# pd.set_option('display.max_columns', 10)
# pd.set_option('display.max_rows', 100)
# pd.set_option('display.width', 100)

Series基本操作

obj=pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

obj.values

array([ 4,  7, -5,  3], dtype=int64)

obj.index

RangeIndex(start=0, stop=4, step=1)

obj2=pd.Series([4,7,-5,3],index=['d', 'b', 'a', 'c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

obj2["a"]

-5

obj2[['c', 'a', 'd']]

c    3
a   -5
d    4
dtype: int64

obj2[obj2>0]

d    4
b    7
c    3
dtype: int64

obj2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

np.exp(obj2)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

"b" in obj2

True

4 in obj2.values

True

# 通过字典创建Series
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

states =['California', 'Ohio', 'Oregon', 'Texas']
obj4 =pd.Series(sdata,index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

pd.notnull(obj4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

obj3+obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

obj4.name="population"
obj4.index.name="state"
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

obj

0    4
1    7
2   -5
3    3
dtype: int64

obj.index= ['Bob','Steve','Jeff', 'Ryan']
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

“”““Dataframe””"

""""Dataframe"""

'"Dataframe'

data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

	state	year	pop
0	Ohio	2000	1.5
1	Ohio	2001	1.7
2	Ohio	2002	3.6
3	Nevada	2001	2.4
4	Nevada	2002	2.9
5	Nevada	2003	3.2

frame=pd.DataFrame(frame, columns=frame.columns.sort_values())
frame

	pop	state	year
0	1.5	Ohio	2000
1	1.7	Ohio	2001
2	3.6	Ohio	2002
3	2.4	Nevada	2001
4	2.9	Nevada	2002
5	3.2	Nevada	2003

pd.DataFrame(data, columns=['year', 'state', 'pop'])

	year	state	pop
0	2000	Ohio	1.5
1	2001	Ohio	1.7
2	2002	Ohio	3.6
3	2001	Nevada	2.4
4	2002	Nevada	2.9
5	2003	Nevada	3.2

# 排序 按列
frame=frame.sort_values(by='year')
frame

	pop	state	year
0	1.5	Ohio	2000
1	1.7	Ohio	2001
3	2.4	Nevada	2001
2	3.6	Ohio	2002
4	2.9	Nevada	2002
5	3.2	Nevada	2003

frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                         index=['one', 'two', 'three', 'four',
                        'five', 'six'])
frame2

	year	state	pop	debt
one	2000	Ohio	1.5	NaN
two	2001	Ohio	1.7	NaN
three	2002	Ohio	3.6	NaN
four	2001	Nevada	2.4	NaN
five	2002	Nevada	2.9	NaN
six	2003	Nevada	3.2	NaN

frame2["state"]

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

frame.year

0    2000
1    2001
3    2001
2    2002
4    2002
5    2003
Name: year, dtype: int64

frame2.loc['three'].values

array([2002, 'Ohio', 3.6, nan], dtype=object)

# 添加列，匹配式添加
frame2['debt'] = 16.5
frame2['debt'] = np.arange(6.)
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

	year	state	pop	debt
one	2000	Ohio	1.5	NaN
two	2001	Ohio	1.7	-1.2
three	2002	Ohio	3.6	NaN
four	2001	Nevada	2.4	-1.5
five	2002	Nevada	2.9	-1.7
six	2003	Nevada	3.2	NaN

frame2["eastern"]=frame2.state == 'Ohio'
frame2

	year	state	pop	debt	eastern
one	2000	Ohio	1.5	NaN	True
two	2001	Ohio	1.7	-1.2	True
three	2002	Ohio	3.6	NaN	True
four	2001	Nevada	2.4	-1.5	False
five	2002	Nevada	2.9	-1.7	False
six	2003	Nevada	3.2	NaN	False

# 删除列
del frame2['eastern']
frame2

	year	state	pop	debt
one	2000	Ohio	1.5	NaN
two	2001	Ohio	1.7	-1.2
three	2002	Ohio	3.6	NaN
four	2001	Nevada	2.4	-1.5
five	2002	Nevada	2.9	-1.7
six	2003	Nevada	3.2	NaN

frame2.iloc[0:3,0:2]

	year	state
one	2000	Ohio
two	2001	Ohio
three	2002	Ohio

frame2.loc["one":"three","year":"state"]

	year	state
one	2000	Ohio
two	2001	Ohio
three	2002	Ohio

ser = pd.Series(np.arange(3.))
ser[-1]

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\indexes\range.py:391, in RangeIndex.get_loc(self, key, method, tolerance)
    390 try:
--> 391     return self._range.index(new_key)
    392 except ValueError as err:


ValueError: -1 is not in range

The above exception was the direct cause of the following exception:


KeyError                                  Traceback (most recent call last)

d:\ai_py_3.9\常用代码\pfda\chapter5.ipynb Cell 38 line 2
      <a href='vscode-notebook-cell:/d%3A/ai_py_3.9/%E5%B8%B8%E7%94%A8%E4%BB%A3%E7%A0%81/pfda/chapter5.ipynb#X51sZmlsZQ%3D%3D?line=0'>1</a> ser = pd.Series(np.arange(3.))
----> <a href='vscode-notebook-cell:/d%3A/ai_py_3.9/%E5%B8%B8%E7%94%A8%E4%BB%A3%E7%A0%81/pfda/chapter5.ipynb#X51sZmlsZQ%3D%3D?line=1'>2</a> ser[-1]


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\series.py:981, in Series.__getitem__(self, key)
    978     return self._values[key]
    980 elif key_is_scalar:
--> 981     return self._get_value(key)
    983 if is_hashable(key):
    984     # Otherwise index.get_value will raise InvalidIndexError
    985     try:
    986         # For labels that don't resolve as scalars like tuples and frozensets


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\series.py:1089, in Series._get_value(self, label, takeable)
   1086     return self._values[label]
   1088 # Similar to Index.get_value, but we do not fall back to positional
-> 1089 loc = self.index.get_loc(label)
   1090 return self.index._get_values_for_loc(self, loc, label)


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\indexes\range.py:393, in RangeIndex.get_loc(self, key, method, tolerance)
    391         return self._range.index(new_key)
    392     except ValueError as err:
--> 393         raise KeyError(key) from err
    394 self._check_indexing_error(key)
    395 raise KeyError(key)


KeyError: -1

ser2=pd.Series(np.arange(3.),index=['a','b','c'])
ser2[-1]

2.0

s1=pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2=pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

s1+s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

df1=pd.DataFrame(np.arange(9.).reshape((3,3)),columns=list('bcd'),index=['Ohio','Texas','Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1

	b	c	d
Ohio	0.0	1.0	2.0
Texas	3.0	4.0	5.0
Colorado	6.0	7.0	8.0

df2

	b	d	e
Utah	0.0	1.0	2.0
Ohio	3.0	4.0	5.0
Texas	6.0	7.0	8.0
Oregon	9.0	10.0	11.0

df1 + df2

	b	c	d	e
Colorado	NaN	NaN	NaN	NaN
Ohio	3.0	NaN	6.0	NaN
Oregon	NaN	NaN	NaN	NaN
Texas	9.0	NaN	12.0	NaN
Utah	NaN	NaN	NaN	NaN

df1 = pd.DataFrame({'A': [1, 2]})

df2 = pd.DataFrame({'B': [3, 4]})
df1

	A
0	1
1	2

df2

	B
0	3
1	4

df1-df2

	A	B
0	NaN	NaN
1	NaN	NaN

list("afs")

['a', 'f', 's']

熊熊想读研究生

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫