Pandas广泛用于数据分析,有两种数据结构:Series和DataFrame,Pandas的操作以列为核心,部分原因在于机器学习的特征以列保存
#Pandas实例记录
#基本数据结构:Series和DataFrame
import pandas as pd
from pandas import DataFrame
from pandas import Series
Series部分
#Series部分
# List -> Series
obj = Series([4, 7, -5, 3])
print(obj)
print(obj.values)
print(obj.index)
#手动定义索引
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
print(obj2)
print(obj2['a'])
print(obj2[obj2 > 0])
"""
d 4
b 7
c 3
"""
# 广播计算
print(obj2 * 2)
print(np.exp(obj2))
# Dict -> Series
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = Series(sdata)
print(obj3)
"""
Ohio 35000
Oregon 16000
Texas 71000
Utah 5000
"""
print(obj3.index)
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states) # 自动与dict的key匹配
print(obj4)
"""
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
"""
print(obj3 + obj4) # 数据自动对齐
"""
California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
"""
obj4.name = '人口'
obj4.index.name = '州'
print(obj4)
"""
州
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
Name: 人口, dtype: float64
"""
obj = Series([4, 7, -5, 3])
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] # 更新索引
obj
DataFrame部分
frame的访问是[列][行],与数组相反
#DataFrame部分
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data) # key对应frame的列名
print(frame)
"""
pop state year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
"""
print(frame.index)
frame.index = ['X', 'Y', 'Z', 'a', 'b']
print(frame)
#frame的访问是[列][行],与数组相反
print(frame['state']['Z'])
"""
Index(['X', 'Y', 'Z', 'a', 'b'], dtype='object')
pop state year
X 1.5 Ohio 2000
Y 1.7 Ohio 2001
Z 3.6 Ohio 2002
a 2.4 Nevada 2001
b 2.9 Nevada 2002
Ohio
"""
frame = DataFrame(data, columns=['year', 'state', 'pop']) # 指定列顺序
print(frame)
"""
year state pop
0 2000 Ohio 1.5
1 2001 Ohio 1.7
2 2002 Ohio 3.6
3 2001 Nevada 2.4
4 2002 Nevada 2.9
"""
"""
DF的数据获取
df.iloc[2, 3] #索引从0开始
df.loc[['2018-11-08', '2018-11-06'], ['open', 'close']]
"""
2150

被折叠的 条评论
为什么被折叠?



