- From dict of Series or dicts
-
d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} d Out[41]: {'one': a 1.0 b 2.0 c 3.0 dtype: float64, 'two': a 1.0 b 2.0 c 3.0 d 4.0 dtype: float64} df=pd.DataFrame(d) df Out[43]: one two a 1.0 1.0 b 2.0 2.0 c 3.0 3.0 d NaN 4.0
pd.DataFrame(d,index=['d','a','b'],columns=['two','three']) Out[45]: two three d 4.0 NaN a 1.0 NaN b 2.0 NaN df.index Out[46]: Index(['a', 'b', 'c', 'd'], dtype='object') df.columns Out[47]: Index(['one', 'two'], dtype='object')
From structured or record array
This case is handled identically to a dict of arrays.
-
In [47]: data = np.zeros((2, ), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')]) In [48]: data[:] = [(1, 2., 'Hello'), (2, 3., "World")] In [49]: pd.DataFrame(data) Out[49]: A B C 0 1 2.0 b'Hello' 1 2 3.0 b'World' In [50]: pd.DataFrame(data, index=['first', 'second']) Out[50]: A B C first 1 2.0 b'Hello' second 2 3.0 b'World' In [51]: pd.DataFrame(data, columns=['C', 'A', 'B']) Out[51]: C A B 0 b'Hello' 1 2.0 1 b'World' 2 3.0
From a list of dicts
-
In [52]: data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}] In [53]: pd.DataFrame(data2) Out[53]: a b c 0 1 2 NaN 1 5 10 20.0 In [54]: pd.DataFrame(data2, index=['first', 'second']) Out[54]: a b c first 1 2 NaN second 5 10 20.0 In [55]: pd.DataFrame(data2, columns=['a', 'b']) Out[55]: a b 0 1 2 1 5 10
DataFrame赋值
# df
Out[43]:
one two
a 1.0 1.0
b 2.0 2.0
c 3.0 3.0
d NaN 4.0
df_new = DataFrame(df,columns=['one','two','three'])
Out[54]:
one two three
a 1.0 1.0 NaN
b 2.0 2.0 NaN
c 3.0 3.0 NaN
d NaN 4.0 NaN
df_new['three'] = range(0,4)
one two three
a 1.0 1.0 0
b 2.0 2.0 1
c 3.0 3.0 2
d NaN 4.0 3
df_new['three'] = np.arange(0,4)
df_new
Out[63]:
one two three
a 1.0 1.0 0
b 2.0 2.0 1
c 3.0 3.0 2
d NaN 4.0 3
df_new['four'] = np.arange(0,4)
one two three four
a 1.0 1.0 0 0
b 2.0 2.0 1 1
c 3.0 3.0 2 2
d NaN 4.0 3 3
Series 和DataFrame
1Series定义:
Series像是一个Python的dict类型,因为它的索引与元素是映射关系
Series也像是一个ndarray类型,因为它也可以通过series_name[index]方式访问
Series是一维的,但能够存储不同类型的数据
每个Series都有一组索引与数据对应,若不指定则默认为整型索引
2.Series创建
显式指定Index:pd.Series([10,2,4,5],index=['A','B','C','D'])
不显式指定Index:pd.Series([10,2,4,5)
通过Dict生成Series:d={'b':1,'c':0,'e':5} pd.Series(d)
通过ndarray创建:pd.Series(no.random.randn(5),index=['A','B','C','D','E']
DataFrame
理解为一张二维表,横行为columns,竖列和Series一样称为index,可以视为Series的集合
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'Country':['Belgium','India','Brazil'],
'Capital':['Brussels','New Delhi','Brasilia'],
'Population':[11190846,1303171035,207847528]}
#Series
s1=pd.Series(data['Country'],index=['A','B','C'])
s1:
A Belgium
B India
C Brazil
dtype: object
s1.values #array(['Belgium', 'India', 'Brazil'], dtype=object)
s1.index #Index(['A', 'B', 'C'], dtype='object')
#DataFrame
df = pd.DataFrame(data)
Country Capital Population
0 Belgium Brussels 11190846
1 India New Delhi 1303171035
2 Brazil Brasilia 207847528
cou=df['Country']#访问一列
type(cou) #pandas.core.series.Serie
df.iterrows()#遍历DataFrame行作为(索引、系列)对。
#<generator object DataFrame.iterrows at 0x11a5cc550>
or row in df.iterrows():
print(row) #每一行是python的TUPLE
print(type(row))
print(len(row))
(0, Country Belgium
Capital Brussels
Population 11190846
Name: 0, dtype: object)
<class 'tuple'>
2
(1, Country India
Capital New Delhi
Population 1303171035
Name: 1, dtype: object)
<class 'tuple'>
2
(2, Country Brazil
Capital Brasilia
Population 207847528
Name: 2, dtype: object)
<class 'tuple'>
2
#一个DataFrame是多个Series组成的
s1=pd.Series(data['Capital'])
s2=pd.Series(data['Country'])
s3=pd.Series(data['Population'])
df_new=pd.DataFrame([s2,s1,s3],index=['Country','Capital','Population'])
df_new = df_new.T # Return the transpose, which is by definition self.
Country Capital Population
0 Belgium Brussels 11190846
1 India New Delhi 1303171035
2 Brazil Brasilia 207847528