什么是pandas
pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
常用数据类型
Series 一维,带有标签的数组
DataFrame 二维 ,Series容器
In [3]:
import pandas as pd
In [4]:
from pandas import Series,DataFrame
In [5]:
import numpy as np
import string
In [10]:
t = pd.Series(data=np.arange(10),index=list(string.ascii_uppercase[:10]))
t
Out[10]:
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int32
In [11]:
type(t)
Out[11]:
pandas.core.series.Series
In [12]:
Series(data=np.arange(10),index=list(string.ascii_uppercase[:10]))
Out[12]:
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int32
In [13]:
a ={string.ascii_uppercase[i]:i for i in range(10)}
a
Out[13]:
{'A': 0,
'B': 1,
'C': 2,
'D': 3,
'E': 4,
'F': 5,
'G': 6,
'H': 7,
'I': 8,
'J': 9}
In [14]:
pd.Series(a)
Out[14]:
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
In [31]:
pd.Series(a, index=list(string.ascii_uppercase[5:15]))
# nan 为 float not a number
Out[31]:
F 5.0
G 6.0
H 7.0
I 8.0
J 9.0
K NaN
L NaN
M NaN
N NaN
O NaN
dtype: float64
In [32]:
t
Out[32]:
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int32
In [33]:
t[2:10:2]
Out[33]:
C 2
E 4
G 6
I 8
dtype: int32
In [34]:
t[1]
Out[34]:
1
In [35]:
t[[2,3,6]]
Out[35]:
C 2
D 3
G 6
dtype: int32
In [36]:
t>4
Out[36]:
A False
B False
C False
D False
E False
F True
G True
H True
I True
J True
dtype: bool
In [37]:
t[t>4]
Out[37]:
F 5
G 6
H 7
I 8
J 9
dtype: int32
In [38]:
t["F"]
Out[38]:
5
In [39]:
t[["A","F","G"]]
Out[39]:
A 0
F 5
G 6
dtype: int32
In [40]:
t[["A","F","g"]]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py:851: FutureWarning:
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
return self.loc[key]
Out[40]:
A 0.0
F 5.0
g NaN
dtype: float64
In [42]:
b =t.index
b
Out[42]:
Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')
In [43]:
type(b)
Out[43]:
pandas.core.indexes.base.Index
In [46]:
c = t.values
c
Out[46]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [47]:
type(c)
Out[47]:
numpy.ndarray
DataFrame
通过粘贴板去创建dataframe
In [48]:
import webbrowser
In [50]:
link= "https://www.baidu.com/"
webbrowser.open(link)
Out[50]:
True
In [51]:
df = pd.read_clipboard()
df
Out[51]:
Jan 2019 Jan 2018 Change Programming Language Ratings Change
0 1 1 NaN Java 16.904% +2.69%
1 2 2 NaN C 13.337% +2.30%
2 3 4 change Python 8.294% +3.62%
3 4 3 change C++ 8.158% +2.55%
4 5 7 change Visual Basic .NET 6.459% +3.20%
5 6 6 NaN JavaScript 3.302% -0.16%
6 7 5 change C# 3.284% -0.47%
7 8 9 change PHP 2.680% +0.15%
8 9 - change SQL 2.277% +2.28%
9 10 16 change Objective-C 1.781% -0.08%
In [52]:
t = pd.DataFrame(data=np.arange(12).reshape(3, 4))
t
Out[52]:
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
In [53]:
# dataframe 对象既有行索引,也有列索引
# 行索引 横向索引 index 0 轴 axis = 0
#列索引 纵向索引 columns 1轴 axis = 1
In [54]:
pd.DataFrame(data=np.arange(12).reshape(3, 4),index=list(string.ascii_lowercase[:3]),columns=list(string.ascii_uppercase[-4:]))
Out[54]:
W X Y Z
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
In [56]:
d1 = {"name":["xiaoming","xiaohong"],"age":[18,20],"tel":[10086,10010]}
d1
Out[56]:
{'name': ['xiaoming', 'xiaohong'], 'age': [18, 20], 'tel': [10086, 10010]}
In [62]:
t2 = DataFrame(d1)
t2
Out[62]:
name age tel
0 xiaoming 18 10086
1 xiaohong 20 10010
In [60]:
type(t2)
Out[60]:
pandas.core.frame.DataFrame
In [61]:
t2.index
Out[61]:
RangeIndex(start=0, stop=2, step=1)
In [63]:
t2.columns
Out[63]:
Index(['name', 'age', 'tel'], dtype='object')
In [64]:
t2.values
Out[64]:
array([['xiaoming', 18, 10086],
['xiaohong', 20, 10010]], dtype=object)
In [65]:
type(t2.values)
Out[65]:
numpy.ndarray
In [66]:
t2.shape
Out[66]:
(2, 3)
In [67]:
t2.dtypes
Out[67]:
name object
age int64
tel int64
dtype: object
In [68]:
t2.ndim
Out[68]:
2
In [ ]: