数模经验-数据处理-pandas

数模经验-数据处理-pandas

代码的具体解释:下次补上

import pandas as pd
import numpy as np
# # 设置panda显示函数
# pd.set_option('display.max_columns', 10)
# pd.set_option('display.max_rows', 100)
# pd.set_option('display.width', 100)

Series基本操作

obj=pd.Series([4,7,-5,3])
obj
0    4
1    7
2   -5
3    3
dtype: int64
obj.values
array([ 4,  7, -5,  3], dtype=int64)
obj.index
RangeIndex(start=0, stop=4, step=1)
obj2=pd.Series([4,7,-5,3],index=['d', 'b', 'a', 'c'])
obj2
d    4
b    7
a   -5
c    3
dtype: int64
obj2.index
Index(['d', 'b', 'a', 'c'], dtype='object')
obj2["a"]
-5
obj2[['c', 'a', 'd']]
c    3
a   -5
d    4
dtype: int64
obj2[obj2>0]
d    4
b    7
c    3
dtype: int64
obj2 * 2
d     8
b    14
a   -10
c     6
dtype: int64
np.exp(obj2)
d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64
"b" in obj2
True
4 in obj2.values
True
# 通过字典创建Series
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
states =['California', 'Ohio', 'Oregon', 'Texas']
obj4 =pd.Series(sdata,index=states)
obj4
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
pd.isnull(obj4)
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool
pd.notnull(obj4)
California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool
obj4.isnull()
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool
obj3
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
obj4
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
obj3+obj4
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64
obj4.name="population"
obj4.index.name="state"
obj4
state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64
obj
0    4
1    7
2   -5
3    3
dtype: int64
obj.index= ['Bob','Steve','Jeff', 'Ryan']
obj
Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

“”““Dataframe””"

""""Dataframe"""
'"Dataframe'
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame
stateyearpop
0Ohio20001.5
1Ohio20011.7
2Ohio20023.6
3Nevada20012.4
4Nevada20022.9
5Nevada20033.2
frame=pd.DataFrame(frame, columns=frame.columns.sort_values())
frame
popstateyear
01.5Ohio2000
11.7Ohio2001
23.6Ohio2002
32.4Nevada2001
42.9Nevada2002
53.2Nevada2003
pd.DataFrame(data, columns=['year', 'state', 'pop'])
yearstatepop
02000Ohio1.5
12001Ohio1.7
22002Ohio3.6
32001Nevada2.4
42002Nevada2.9
52003Nevada3.2
# 排序 按列
frame=frame.sort_values(by='year')
frame
popstateyear
01.5Ohio2000
11.7Ohio2001
32.4Nevada2001
23.6Ohio2002
42.9Nevada2002
53.2Nevada2003
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                         index=['one', 'two', 'three', 'four',
                        'five', 'six'])
frame2
yearstatepopdebt
one2000Ohio1.5NaN
two2001Ohio1.7NaN
three2002Ohio3.6NaN
four2001Nevada2.4NaN
five2002Nevada2.9NaN
six2003Nevada3.2NaN
frame2["state"]
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object
frame.year
0    2000
1    2001
3    2001
2    2002
4    2002
5    2003
Name: year, dtype: int64
frame2.loc['three'].values
array([2002, 'Ohio', 3.6, nan], dtype=object)
# 添加列,匹配式添加
frame2['debt'] = 16.5
frame2['debt'] = np.arange(6.)
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2
yearstatepopdebt
one2000Ohio1.5NaN
two2001Ohio1.7-1.2
three2002Ohio3.6NaN
four2001Nevada2.4-1.5
five2002Nevada2.9-1.7
six2003Nevada3.2NaN
frame2["eastern"]=frame2.state == 'Ohio'
frame2
yearstatepopdebteastern
one2000Ohio1.5NaNTrue
two2001Ohio1.7-1.2True
three2002Ohio3.6NaNTrue
four2001Nevada2.4-1.5False
five2002Nevada2.9-1.7False
six2003Nevada3.2NaNFalse
# 删除列
del frame2['eastern']
frame2
yearstatepopdebt
one2000Ohio1.5NaN
two2001Ohio1.7-1.2
three2002Ohio3.6NaN
four2001Nevada2.4-1.5
five2002Nevada2.9-1.7
six2003Nevada3.2NaN
frame2.iloc[0:3,0:2]
yearstate
one2000Ohio
two2001Ohio
three2002Ohio
frame2.loc["one":"three","year":"state"]
yearstate
one2000Ohio
two2001Ohio
three2002Ohio
ser = pd.Series(np.arange(3.))
ser[-1]
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\indexes\range.py:391, in RangeIndex.get_loc(self, key, method, tolerance)
    390 try:
--> 391     return self._range.index(new_key)
    392 except ValueError as err:


ValueError: -1 is not in range

The above exception was the direct cause of the following exception:


KeyError                                  Traceback (most recent call last)

d:\ai_py_3.9\常用代码\pfda\chapter5.ipynb Cell 38 line 2
      <a href='vscode-notebook-cell:/d%3A/ai_py_3.9/%E5%B8%B8%E7%94%A8%E4%BB%A3%E7%A0%81/pfda/chapter5.ipynb#X51sZmlsZQ%3D%3D?line=0'>1</a> ser = pd.Series(np.arange(3.))
----> <a href='vscode-notebook-cell:/d%3A/ai_py_3.9/%E5%B8%B8%E7%94%A8%E4%BB%A3%E7%A0%81/pfda/chapter5.ipynb#X51sZmlsZQ%3D%3D?line=1'>2</a> ser[-1]


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\series.py:981, in Series.__getitem__(self, key)
    978     return self._values[key]
    980 elif key_is_scalar:
--> 981     return self._get_value(key)
    983 if is_hashable(key):
    984     # Otherwise index.get_value will raise InvalidIndexError
    985     try:
    986         # For labels that don't resolve as scalars like tuples and frozensets


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\series.py:1089, in Series._get_value(self, label, takeable)
   1086     return self._values[label]
   1088 # Similar to Index.get_value, but we do not fall back to positional
-> 1089 loc = self.index.get_loc(label)
   1090 return self.index._get_values_for_loc(self, loc, label)


File d:\Anaconda3\envs\guoguo\lib\site-packages\pandas\core\indexes\range.py:393, in RangeIndex.get_loc(self, key, method, tolerance)
    391         return self._range.index(new_key)
    392     except ValueError as err:
--> 393         raise KeyError(key) from err
    394 self._check_indexing_error(key)
    395 raise KeyError(key)


KeyError: -1
ser2=pd.Series(np.arange(3.),index=['a','b','c'])
ser2[-1]

2.0
s1=pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2=pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1
a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64
s2
a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64
s1+s2
a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64
df1=pd.DataFrame(np.arange(9.).reshape((3,3)),columns=list('bcd'),index=['Ohio','Texas','Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1
bcd
Ohio0.01.02.0
Texas3.04.05.0
Colorado6.07.08.0
df2
bde
Utah0.01.02.0
Ohio3.04.05.0
Texas6.07.08.0
Oregon9.010.011.0
df1 + df2
bcde
ColoradoNaNNaNNaNNaN
Ohio3.0NaN6.0NaN
OregonNaNNaNNaNNaN
Texas9.0NaN12.0NaN
UtahNaNNaNNaNNaN
df1 = pd.DataFrame({'A': [1, 2]})

df2 = pd.DataFrame({'B': [3, 4]})
df1
A
01
12
df2
B
03
14
df1-df2
AB
0NaNNaN
1NaNNaN
list("afs")
['a', 'f', 's']

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
使用Python进行数学建模和数据处理是一个非常受欢迎的选择。Python具有丰富的库和工具,可以帮助我们处理大量的数据,并进行各种数据分析和可视化。引用中提到了Python可以用于数学建模、编写软件、制作网站甚至控制单片机。这些功能使得Python成为一个非常全能且灵活的工具。 对于数学建模和数据处理,Python的一些常用库包括NumPy、SciPy、scikit-learn、pandas、matplotlib、seaborn等。引用列出了一些常用的库。NumPy提供了高性能的多维数组对象和一些用于处理数组的函数,SciPy则提供了一些科学计算的工具和算法。scikit-learn是一个用于机器学习的Python库,可以帮助我们进行分类、回归、聚类等任务。pandas提供了高效的数据结构和数据分析工具,可以方便地进行数据清洗、处理和分析。而matplotlib和seaborn则是用于数据可视化的库,可以帮助我们创建各种图表和图形来展示数据。 使用Python进行数学建模和数据处理的好处是多方面的。首先,Python是一门易学易用的语言,对于初学者来说比较友好。其次,Python拥有庞大的社区和丰富的资源,你可以在网上找到大量的学习资料和解决方案。此外,Python具有广泛的应用领域,无论是科学研究、金融分析、工程建模还是数据科学,Python都能发挥重要的作用。最后,Python还可以与其他语言和工具进行集成,因此可以与其他领域的专业软件和工具进行无缝对接。 总结起来,使用Python进行数学建模和数据处理是一个非常灵活和强大的选择。Python的丰富库和工具使得数据处理变得更加高效和便捷,而Python的易学易用的特性也使得它成为初学者的首选。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

熊熊想读研究生

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值