#!/usr/bin/env Python3
__author__ ='未昔/angelfate'
__date__ ='2019/8/6 10:52'# -*- coding: utf-8 -*-import pandas as pd
import numpy as np
"""
数据结构 DataFrame
创建,读写
"""print('用字典生成DataFrame,key为列名(默认key是无序的)。')
data ={'name':['wang','wei','RPA','python','linux','C'],'year':[2000,2010,2020,2030,2040,2050],'pop':[1.0,1.1,1.2,1.3,1.4,1.5]}print('\n')print(pd.DataFrame(data))# 转换为 DataFrameprint('----指定索引顺序----')print(pd.DataFrame(data, columns=['name','year','pop']))# 指定索引顺序print('----指定索引,不存在的列,默认使用数据NaN----')
data2 = pd.DataFrame(data,
columns =['name','year','pop','state','con'],
index =['one','two','three','four','five','six'])print(data2)print('----查询指定 列索引 数据---')print(data2['name'])# 查询指定 列索引 数据print(data2.year)print('----查询指定 行索引 数据----')print(data2.ix['two'])# 查询指定 行索引 数据print('----修改列值----')
data2['state']=16.5# 修改一整列,为指定数据print(data2)
data2.con = np.arange(6)# 用numpy数组修改元素print(data2)print('\n')print('用Serice指定要修改的索引及对应值,没有指定的数据用Nan。')
val = pd.Series([-1.0,-1.1,-1.2], index =['two','three','five'])
data2['con']= val
print(data2)print('\n')print('---赋值给新列---')
data2['new_data']=(data2.name =='wang')#print(data2)print(data2.columns)# 打印列名print('\n')print('---DataFrame转置---')
values ={'Nevada':{2001:1.1,2002:2.2},'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
data3 = pd.DataFrame(values)print(data3)print(data3.T)# 行列转置print('\n')print('---指定索引顺序,使用切片初始化顺序---')print(pd.DataFrame(values, index =[2001,2002,2003]))
data4 ={'Ohio':data3['Ohio'][:-1],'Nevada':data3['Nevada'][:2]}print(pd.DataFrame(data4))print('---指定索引和列名---')
data3.index.name ='year'
data3.columns.name ='state'print(data3)print(data3.values)# 只打印数据print(data2.values)
结果
用字典生成DataFrame,key为列名(默认key是无序的)。
name pop year
0 wang 1.020001 wei 1.120102 RPA 1.220203 python 1.320304 linux 1.420405 C 1.52050----指定索引顺序----
name year pop
0 wang 20001.01 wei 20101.12 RPA 20201.23 python 20301.34 linux 20401.45 C 20501.5----指定索引,不存在的列,默认使用数据NaN----
name year pop state con
one wang 20001.0 NaN NaN
two wei 20101.1 NaN NaN
three RPA 20201.2 NaN NaN
four python 20301.3 NaN NaN
five linux 20401.4 NaN NaN
six C 20501.5 NaN NaN
----查询指定 列索引 数据---
one wang
two wei
three RPA
four python
five linux
six C
Name: name, dtype:object
one 2000
two 2010
three 2020
four 2030
five 2040
six 2050
Name: year, dtype: int64
----查询指定 行索引 数据----
name wei
year 2010
pop 1.1
state NaN
con NaN
Name: two, dtype:object----修改列值----
name year pop state con
one wang 20001.016.5 NaN
two wei 20101.116.5 NaN
three RPA 20201.216.5 NaN
four python 20301.316.5 NaN
five linux 20401.416.5 NaN
six C 20501.516.5 NaN
name year pop state con
one wang 20001.016.50
two wei 20101.116.51
three RPA 20201.216.52
four python 20301.316.53
five linux 20401.416.54
six C 20501.516.55
用Serice指定要修改的索引及对应值,没有指定的数据用Nan。
name year pop state con
one wang 20001.016.5 NaN
two wei 20101.116.5-1.0
three RPA 20201.216.5-1.1
four python 20301.316.5 NaN
five linux 20401.416.5-1.2
six C 20501.516.5 NaN
---赋值给新列---
name year pop state con new_data
one wang 20001.016.5 NaN True
two wei 20101.116.5-1.0False
three RPA 20201.216.5-1.1False
four python 20301.316.5 NaN False
five linux 20401.416.5-1.2False
six C 20501.516.5 NaN False
Index(['name','year','pop','state','con','new_data'], dtype='object')---DataFrame转置---
Nevada Ohio
2000 NaN 1.520011.11.720022.23.6200020012002
Nevada NaN 1.12.2
Ohio 1.51.73.6---指定索引顺序,使用切片初始化顺序---
Nevada Ohio
20011.11.720022.23.62003 NaN NaN
Nevada Ohio
2000 NaN 1.520011.11.7---指定索引和列名---
state Nevada Ohio
year
2000 NaN 1.520011.11.720022.23.6[[nan 1.5][1.11.7][2.23.6]][['wang'20001.016.5 nan True]['wei'20101.116.5-1.0False]['RPA'20201.216.5-1.1False]['python'20301.316.5 nan False]['linux'20401.416.5-1.2False]['C'20501.516.5 nan False]]
Process finished with exit code 0