import pandas as pd
import numpy as np
# Series
# Series是一维数组,只允许存储相同的数据类型
# 1)创建方法Series([数据1, 数据2,···], index[索引1, 索引2,···])
# index默认0 1 2 ···
s = pd.Series([15, -2, 7, 9], index=['a', 'b', 'c', 'd'])
print("创建的Series为:\n", s)
print(s.values) # [15 -2 7 9]
print(s.index) # Index(['a', 'b', 'c', 'd'], dtype='object')
# 2)索引
print("s[0:2]索引的元素为:\n", s[0:2])
print("s[['b', 'c', 'd']]索引的元素为:\n", s[['b', 'c', 'd']])
# 3)更改元素值
# s[1]=0 弃用了
s.iloc[1] = 0
s['d'] = -5
print(s)
# 4)追加元素_append
s = pd.Series([15, -2, 7, 9], index=['a', 'b', 'c', 'd'])
n = pd.Series([2], index=['e'])
print("追加后的Series为:\n", s._append(n))
# 5)删除元素
print("drop删除后的Series为:\n", s.drop('a'))
print("根据值删除后的Series为:\n", s[7 != s.values])
print("根据索引删除后的Series为:\n", s.drop(s.index[3]))
# 6)排序
s = pd.Series([1.5, -2.0, 7.3, 9.4], index=['d', 'b', 'a', 'c'])
print("创建的Series为:\n", s)
s1 = s.reindex(['a', 'b', 'c', 'd', 'e'])
print("重新索引后的Series为:\n", s1)
print("填充后的Series为:\n", s.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0))
# DataFrame
# DataFrame由按照一定顺序排列的多维数据组成,由Series组成的字典
data = {'color': ['blue', 'green', 'yellow', 'red', 'white'],
'object': ['ball', 'pen', 'pencil', 'paper', 'mug'],
'price': [1.2, 1.0, 0.6, 0.9, 1.7]}
df = pd.DataFrame(data)
print("创建的DataFrame为:\n", df)
# columns=[] 选择需要的列
df1 = pd.DataFrame(data, columns=['price', 'color'])
print("创建的DataFrame1为:\n", df1)
# index=[] 更改索引
df2 = pd.DataFrame(data, index=['one', 'two', 'three', 'four', 'five'])
print("创建的DataFrame2为:\n", df2)
# 自定义 顺序:数据矩阵 index选项 columns选项
df3 = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['red', 'blue', 'yellow', 'white'],
columns=['ball', 'pen', 'pencil', 'paper'])
print("创建的DataFrame3为:\n", df3)
结果:
创建的Series为:
a 15
b -2
c 7
d 9
dtype: int64
[15 -2 7 9]
Index(['a', 'b', 'c', 'd'], dtype='object')
s[0:2]索引的元素为:
a 15
b -2
dtype: int64
s[['b', 'c', 'd']]索引的元素为:
b -2
c 7
d 9
dtype: int64
a 15
b 0
c 7
d -5
dtype: int64
追加后的Series为:
a 15
b -2
c 7
d 9
e 2
dtype: int64
drop删除后的Series为:
b -2
c 7
d 9
dtype: int64
根据值删除后的Series为:
a 15
b -2
d 9
dtype: int64
根据索引删除后的Series为:
a 15
b -2
c 7
dtype: int64
创建的Series为:
d 1.5
b -2.0
a 7.3
c 9.4
dtype: float64
重新索引后的Series为:
a 7.3
b -2.0
c 9.4
d 1.5
e NaN
dtype: float64
填充后的Series为:
a 7.3
b -2.0
c 9.4
d 1.5
e 0.0
dtype: float64
创建的DataFrame为:
color object price
0 blue ball 1.2
1 green pen 1.0
2 yellow pencil 0.6
3 red paper 0.9
4 white mug 1.7
创建的DataFrame1为:
price color
0 1.2 blue
1 1.0 green
2 0.6 yellow
3 0.9 red
4 1.7 white
创建的DataFrame2为:
color object price
one blue ball 1.2
two green pen 1.0
three yellow pencil 0.6
four red paper 0.9
five white mug 1.7
创建的DataFrame3为:
ball pen pencil paper
red 0 1 2 3
blue 4 5 6 7
yellow 8 9 10 11
white 12 13 14 15