目录
基本操作
Series
相当于一个定长有序的字典
创建
import pandas as pd
# 直接创建,使用默认的索引
a = pd.Series([1, 2, 3, 4])
print(a)
# 直接创建,指定索引
a1 = pd.Series([1, 2, 3, 4], index=["a", "b", "c", "d"])
print(a1)
# 通过字典创建
b = {"name": "aaa", "age": 18, "home": "A"}
c = pd.Series(b)
print(c)
'''
0 1
1 2
2 3
3 4
dtype: int64
a 1
b 2
c 3
d 4
dtype: int64
name aaa
age 18
home A
dtype: object
'''
相关属性
import pandas as pd
a = pd.Series([1, 2, 3, 4])
print(a.values) # 查看所有的值
print(a.index) # 查看索引
b = 4 in a # 判断4是不是a中的一个索引
print(b)
c = 4 in a.values # 判断4是不是a中的一个值
print(c)
print(a[3]) # 根据索引取值values
print(a[[1, 3]]) # 根据索引取多个值
'''
[1 2 3 4]
RangeIndex(start=0, stop=4, step=1)
False
True
4
1 2
3 4
dtype: int64
'''
DataFrame
表格化数据
创建
import pandas as pd
import numpy as np
# 使用字典创建,这里值如果只是一个单独的量,不是列表的话,需要加索引
a = {"name": "aaa", "age": 18, "city": "A"}
df1 = pd.DataFrame(a, index=[1])
print(df1)
b = {"name": ["aaa", "bbb", "ccc"],
"age": [18, 19, 20],
"city": ["A", "B", "C"]}
df2 = pd.DataFrame(b)
print(df2)
# numpy 生成
c = pd.DataFrame(np.arange(24).reshape(4, 6), index=["a", "b", "c", "d"])
print(c)
'''
name age city
1 aaa 18 A
name age city
0 aaa 18 A
1 bbb 19 B
2 ccc 20 C
0 1 2 3 4 5
a 0 1 2 3 4 5
b 6 7 8 9 10 11
c 12 13 14 15 16 17
d 18 19 20 21 22 23
'''
基本属性
import pandas as pd
a = {"name": ["aaa", "bbb", "ccc"],
"age": [18, 19, 20],
"city": ["A", "B", "C"]}
df1 = pd.DataFrame(a, index=[2, 1, 3])
print(df1)
print(df1.T) # 转置
print(df1.describe()) # 整体分析
print(df1.index) # 行属性(索引)
print(df1.columns) # 列属性
print(df1.sort_index(axis=0)) # 按索引排序 print(df1.sort_index(axis=1)) 按列属性排序
print(df1.sort_values(by="age")) # 按年龄这一列排序
'''
name age city
2 aaa 18 A
1 bbb 19 B
3 ccc 20 C
2 1 3
name aaa bbb ccc
age 18 19 20
city A B C
age
count 3.0
mean 19.0
std 1.0
min 18.0
25% 18.5
50% 19.0
75% 19.5
max 20.0
Int64Index([2, 1, 3], dtype='int64')
Index(['name', 'age', 'city'], dtype='object')
name age city
1 bbb 19 B
2 aaa 18 A
3 ccc 20 C
name age city
2 aaa 18 A
1 bbb 19 B
3 ccc 20 C
'''
数据的选择
# 数据
import pandas as pd
import numpy as np
dates = pd.date_range("20191111", periods=6)
df = pd.DataFrame(np.arange(24).reshape(6, 4), index=dates, columns=["a", "b", "c", "d"])
print(df)
'''
a b c d
2019-11-11 0 1 2 3
2019-11-12 4 5 6 7
2019-11-13 8 9 10 11
2019-11-14 12 13 14 15
2019-11-15 16 17 18 19
2019-11-16 20 21 22 23
'''
一般列标签、索引取值
# 列标签
print(df.a)
# same as
print(df["a"])
# 索引
print(df["2019-11-12":"2019-11-13"])
# same as
print(df[1:3])
'''
2019-11-11 0
2019-11-12 4
2019-11-13 8
2019-11-14 12
2019-11-15 16
2019-11-16 20
Freq: D, Name: a, dtype: int32
a b c d
2019-11-12 4 5 6