一、创建DataFrame
1.list创建
import pandas as pd
import numpy as np
"""
list创建DataFrame
"""
data = {
'name': ['Jack', 'Mary', 'Tom'], # 列下标:'name'
'age': [14, 15, 17],
'gender': ['M', 'W', 'M']
}
fr = pd.DataFrame(data)
print(fr)
2.Series创建
"""
Series创建DataFrame
"""
data = {
# 列下标'one',index=[行下标'a',行下标'b',行下标]
'one': pd.Series(np.random.rand(2), index=['a', 'b']),
'two': pd.Series(np.random.rand(3), index=['a', 'b', 'c'])
}
fr1 = pd.DataFrame(data)
print(fr1)
3.二维数组创建
"""
二维数组创建DataFrame
"""
ar = np.random.rand(9).reshape(3, 3)
fr3 = pd.DataFrame(ar, index=['a', 'b', 'c'], columns=['s', 'h', 'j'])
print(fr3)
4. 字典组成的列表创建
"""
字典组成的列表创建DataFrame:
每个字典是一行,一个key-value对是一列
"""
data = [{'one': 1, 'two': 2, 'three': 3},
{'four': 4, 'five': 5, 'six': 6}]
fr5 = pd.DataFrame(data)
print(fr5)
5.字典组成的字典创建
import pandas as pd
import numpy as np
"""
字典组成的字典:
外字典是列
"""
data = {
'Tom': {'art': 67, 'english': 98, 'china': 76},
'Mary': {'art': 45, 'english': 78, 'china': 70},
'Lucy': {'art': 58, 'english': 79}
}
fr6 = pd.DataFrame(data)
print(fr6)
二、DataFrame查看
1.head(),tail()函数:
import pandas as pd
import numpy as np
"""
1.head(),tail()函数:
可以使用head方法查看DataFrame对象的前5行,用tail方法查看后5行,
或者head(3),tail(3)指定查看行数.
"""
data1 = {
'state':['beijing','beijing','beijing','shanghai','shanghai','shanghai'],
'year':[2000,2001,2002,2001,2002,2003],
'pop':[1.5, 1.7,3.6,2.4,2.9,3.2]}
df = pd.DataFrame(data1)
print(df)
print(df.head())
print(df.head(2))
print(df.tail())
print(df.tail(3))
2.列排序 (通过重新创建时,调整列下标columns顺序)
"""
2.列排序
"""
print(pd.DataFrame(data1, columns=['year', 'state', 'pop']))
3.行、列索引
"""
3.定行、列索引
"""
data2 = df['pop'] # 选择一列
data3 = df[['state', 'year']] # 选择多列
print(data2)
print(data3)
"""
既可以按index,又可以按整数位置
"""
data4 = df.loc[0] # 选择一行
data6 = df.loc[[2, 0]] # 选择多行
data9 = df.loc[1:2] # 切片索引:[1,2]
data5 = df.loc['one']
data7 = df.loc[['three', 'one']]
data8 = df.loc['one':'two']
三、DataFrame修改
1.追加列
df['debt'] = 12
print(df)
df['debt'] = np.arange(1, 7)
print(df)
val = pd.Series([1, 2, 3], index=[2, 3, 4]) # index缺失值以NaN填补
df['debt'] = val
print(df)
2.追加列
df = pd.DataFrame(data1, index=list('abcdef'))
df1 = df.loc['a']
print(df.append(df1)) # 追加一行
3.删除列
df['new'] = (df.state=='beijing')
print(df)
del df['new']
print(df)
4.转置
print(df.T)
5.列名、行(索引)名
df.index.name = 'order'
df.columns.name = 'key'
print(df)
print(df.values)
6.DataFrame整体情况
print(df.info)