文章目录
GitHub: https://github.com/RealEmperor/Python-for-Data-Analysis
pandas
Series
import numpy as np
import pandas as pd
import sys
from pandas import Series, DataFrame
obj = Series([4, 7, -5, 3])
print(obj)
print(obj.values)
print(obj.index)
0 4
1 7
2 -5
3 3
dtype: int64
[ 4 7 -5 3]
RangeIndex(start=0, stop=4, step=1)
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
print(obj2)
print(obj2.index)
print(obj2['a'])
d 4
b 7
a -5
c 3
dtype: int64
Index(['d', 'b', 'a', 'c'], dtype='object')
-5
obj2['d'] = 6
obj2[['c', 'a', 'd']]
c 3
a -5
d 6
dtype: int64
obj2[obj2 > 0]
d 6
b 7
c 3
dtype: int64
obj2 * 2
d 12
b 14
a -10
c 6
dtype: int64
np.exp(obj2)
d 403.428793
b 1096.633158
a 0.006738
c 20.085537
dtype: float64
'b' in obj2
True
'e' in obj2
False
sdata = {
'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = Series(sdata)
print(obj3)
Ohio 35000
Oregon 16000
Texas 71000
Utah 5000
dtype: int64
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states)
print(obj4)
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
pd.isnull(obj4)
California True
Ohio False
Oregon False
Texas False
dtype: bool
pd.notnull(obj4)
California False
Ohio True
Oregon True
Texas True
dtype: bool
obj4.isnull()
California True
Ohio False
Oregon False
Texas False
dtype: bool
print(obj3)
print(obj4)
print(obj3 + obj4)
Ohio 35000
Oregon 16000
Texas 71000
Utah 5000
dtype: int64
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
dtype: float64
obj4.name = 'population'
obj4.index.name = 'state'
print(obj4)
state
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
Name: population, dtype: float64
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
print(obj)
Bob 4
Steve 7
Jeff -5
Ryan 3
dtype: int64
dataframe
data = {
'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
print(frame)
pop state year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
DataFrame(data, columns=['year', 'state', 'pop'])
year | state | pop | |
---|---|---|---|
0 | 2000 | Ohio | 1.5 |
1 | 2001 | Ohio | 1.7 |
2 | 2002 | Ohio | 3.6 |
3 | 2001 | Nevada | 2.4 |
4 | 2002 | Nevada | 2.9 |
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four', 'five'])
print(frame2)
print(frame2.columns)
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
Index(['year', 'state', 'pop', 'debt'], dtype='object')
print(frame2['state'])
print(frame2.year)
print(frame2.loc['three'])
one Ohio
two Ohio
three Ohio
four Nevada
five Nevada
Name: state, dtype: object
one 2000
two 2001
three 2002
four 2001
five 2002
Name: year, dtype: int64
year 2002
state Ohio
pop 3.6
debt NaN
Name: three, dtype: object
frame2['debt'] = 16.5
print(frame2)
year state pop debt
one 2000 Ohio 1.5 16.5
two 2001 Ohio 1.7 16.5
three 2002 Ohio 3.6 16.5
four 2001 Nevada 2.4 16.5
five 2002 Nevada 2.9 16.5
frame2['debt'] = np.arange(5.)