一 . DataFrame转成python中的数据格式
1 . 转成json
DataFrame转成json,可以使用df.to_json()方法
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json())#{"name":{"0":"mashiro","1":"satori","2":"koishi","3":"nagisa"},"age":{"0":17,"1":17,"2":16,"3":21}}
我们看到虽然转化成了json,但是有些不完美,那就是它把索引也算进去了
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})#如果不想加索引的话,那么指定index=False即可
try:print(df.to_json(index=False))exceptException as e:print(e) #'index=False' is only valid when 'orient' is 'split' or 'table'#但是它报错了,说如果index=False,那么orient必须指定我split或者table
我们看一下这个orient是什么
首先orient可以有如下取值:split、records、index、columns、values、table
我们分别演示一下,看看orient取不同的值,结果会有什么变化
orient='split'
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json(orient="split"))"""{
"columns":["name","age"],
"index":[0,1,2,3],
"data":[["mashiro",17],["satori",17],["koishi",16],["nagisa",21]]
}"""
print(df.to_json(orient="split", index=False))"""{
"columns":["name","age"],
"data":[["mashiro",17],["satori",17],["koishi",16],["nagisa",21]]
}"""
我们看到会变成三个键值对,分别是列名、索引、数据
orient='records'
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json(orient="records"))"""[{"name":"mashiro","age":17},
{"name":"satori","age":17},
{"name":"koishi","age":16},
{"name":"nagisa","age":21}]"""
这种格式的数据是比较常用的,相当于列名和每一行数据组合成一个字典,然后存在一个列表里面。并且我们看到生成json默认跟索引没啥关系,所以不需要、也不可以加index=False
orient='index'
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json(orient="index"))"""{
"0":{"name":"mashiro","age":17},
"1":{"name":"satori","age":17},
"2":{"name":"koishi","age":16},
"3":{"name":"nagisa","age":21}
}"""
类似于records,只不过这里把字典作为value放在了外层字典里,其中key为对应的索引。当然这里同样不可以加index=False
orient='columns'
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json(orient="columns"))"""{"name":{"0":"mashiro","1":"satori","2":"koishi","3":"nagisa"},"age":{"0":17,"1":17,"2":16,"3":21}}"""
我们看到这个和不指定orient得到结果是一样的,其实不指定的话orient默认是columns
orient=values
importpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})print(df.to_json(orient="values"))"""[["mashiro",17],["satori",17],["koishi",16],["nagisa",21]]"""
#我们看到当orient指定为values,会只获取数据#另外这个方式类似于to_numpy
print(df.to_numpy())"""[['mashiro' 17]
['satori' 17]
['koishi' 16]
['nagisa' 21]]"""orient=tableimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})#以数据库二维表的形式返回
print(df.to_json(orient="table"))"""{
"schema": {
"fields": [{"name": "index", "type": "integer"},
{"name": "name", "type": "string"},
{"name": "age", "type": "integer"}],
"primaryKey": ["index"],
"pandas_version": "0.20.0"
},
"data": [{"index": 0, "name": "mashiro", "age": 17},
{"index": 1, "name": "satori", "age": 17},
{"index": 2, "name": "koishi", "age": 16},
{"index": 3, "name": "nagisa", "age": 21}]
}"""
print(df.to_json(orient="table", index=False))"""{
"schema": {
"fields": [{"name": "name", "type": "string"},
{"name": "age", "type": "integer"}],
"pandas_version": "0.20.0"
},
"data": [{"name": "mashiro", "age": 17},
{"name": "satori", "age": 17},
{"name": "koishi", "age": 16},
{"name": "nagisa", "age": 21}]
}"""
2 . 转成dict
DataFrame也可以转成字典,转换成字典里面也有一个orient参数,里面有一部分和to_json是类似的。因为json这个数据结构本身就借鉴了python中的字典,是的你没有看错,json这种数据结构参考了python中的字典。
to_dict中的orient可以有如下取值:dict、list、series、split、records、index,默认是dict
orient='dict'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})
pprint(df.to_dict(orient="dict"))"""{'age': {0: 17, 1: 17, 2: 16, 3: 21},
'name': {0: 'mashiro', 1: 'satori', 2: 'koishi', 3: 'nagisa'}}"""
orient='list'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})
pprint(df.to_dict(orient="list"))"""{'age': [17, 17, 16, 21], 'name': ['mashiro', 'satori', 'koishi', 'nagisa']}"""
orient='series'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})#这种结构真的不常用,就是一个key对应一个series
pprint(df.to_dict(orient="series"))"""{'age':
0 17
1 17
2 16
3 21
Name: age, dtype: int64,
'name': 0 mashiro
1 satori
2 koishi
3 nagisa
Name: name, dtype: object}"""
orient='split'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})
pprint(df.to_dict(orient="split"))"""{'columns': ['name', 'age'],
'data': [['mashiro', 17], ['satori', 17], ['koishi', 16], ['nagisa', 21]],
'index': [0, 1, 2, 3]}"""
orient='records'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})
pprint(df.to_dict(orient="records"))"""[{'age': 17, 'name': 'mashiro'},
{'age': 17, 'name': 'satori'},
{'age': 16, 'name': 'koishi'},
{'age': 21, 'name': 'nagisa'}]"""
orient='index'
from pprint importpprintimportpandas as pd
df= pd.DataFrame({"name": ["mashiro", "satori", "koishi", "nagisa"],"age": [17, 17, 16, 21]})
pprint(df.to_dict(orient="index"))"""{0: {'age': 17, 'name': 'mashiro'},
1: {'age': 17, 'name': 'satori'},
2: {'age': 16, 'name': 'koishi'},
3: {'age': 21, 'name': 'nagisa'}}"""
二 . python中的数据格式转成DataFrame
1 . 字典转成DataFrame
importpandas as pd
data= {0: {'age': 17, 'name': 'mashiro'},1: {'age': 17, 'name': 'satori'},2: {'age': 16, 'name': 'koishi'},3: {'age': 21, 'name': 'nagisa'}}
df=pd.DataFrame.from_dict(data)#显然不是我们期待的格式
print(df)"""0 1 2 3
age 17 17 16 21
name mashiro satori koishi nagisa"""df= pd.DataFrame.from_dict(data, orient="index")print(df)"""age name
0 17 mashiro
1 17 satori
2 16 koishi
3 21 nagisa"""
所以df.to_dict和pd.DataFrame.from_json实现的是相反的功能,但是from_dict中的orient参数只有两种选择,要么是index,要么是columns,默认是columns
from_records
from_records是专门针对外层是列表的数据
importpandas as pd
data= [{'age': 17, 'name': 'mashiro'},
{'age': 17, 'name': 'satori'},
{'age': 16, 'name': 'koishi'},
{'age': 21, 'name': 'nagisa'}]
df=pd.DataFrame.from_records(data)print(df)"""age name
0 17 mashiro
1 17 satori
2 16 koishi
3 21 nagisa"""
其实这种数据就是to_dict(orient="records")生成的