Dataframe基本认识及其属性
import numpy
import pandas
res = numpy.load('国民经济核算季度数据.npz',allow_pickle=True)
columns = res['columns']
values = res['values']
data = numpy.concatenate(([columns],values),axis=0)
print(data)
index = ['index_'+str(i) for i in numpy.arange(69)]
df = pandas.DataFrame(values,columns=columns,index=index)
print(df)
se = df['时间']
print(se)
print('-'*90)
d = {'col1':[0,1,2],'col2':[0,1,2],'col3':[0,1,2]}
df = pandas.DataFrame(data=d,index=['q','w','e'])
print(df)
res = df[['col1','col2']]
print(res)
se = pandas.Series([1,2,3],index=['q','w','e'])
print(se)
print("df 的values:\n",df.values)
print("df 的index:\n",df.index)
print("df 的columns:\n",df.columns)
print("df 的size:\n",df.size)
print("df 的dtypes:\n",df.dtypes)
print("df 的shape:\n",df.shape)
print("df 的ndim:\n",df.ndim)
Dataframe对文件的操作
查看文件的编码
![在这里插入图片描述](https://img-blog.csdnimg.cn/20190911203229920.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3lid18yNTY5,size_16,color_FFFFFF,t_70)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20190911203225400.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3lid18yNTY5,size_16,color_FFFFFF,t_70)
import pandas
data = pandas.read_table('data_file/meal_order_info.csv',encoding='ansi',sep=',')
print(data)
data = pandas.read_csv('data_file/meal_order_info.csv',encoding='ansi')
print(data)
print('-*'*60)
data = pandas.read_excel('data_file/meal_order_detail.xlsx',sheet_name=0)
print(data)
print('&*#'*50)
data = pandas.read_csv('data_file/ssss.csv',encoding='ansi')
print(data)
Dataframe的增删改查
查
import pandas
data = pandas.read_excel('data_file/meal_order_detail.xlsx')
'''
# 数组是同时索引【行,列】
# dataframe的非同时索引方法,需要先取列再取行
# 获取detail_id 之后再获取 前5行
res = data['detail_id'][:5]
res1 = data['detail_id'][[0,1,2,3,4]]
res2 = data['detail_id'].head() # head默认获取前5行
print(res,'\n',res1,'\n',res2)
res3 = data['detail_id'].head(10) # 获取前10行
print('-*'*60)
print('-*'*60)
# 获取detail_id 之后再获取 后5行
res = data['detail_id'][-5:]
res1 = data['detail_id'].tail() # head默认获取后5行
print(res,'\n',res1)
res2 = data['detail_id'].tail(10) # 获取后10行
# 获取单列 之后再获取行数据,单行的名称或者下标,
# 多行的名称或者 下标
# 如果是单列,直接写名称,如果是多列,需要将多列的名称组成一个列表
# 不能使用列的下标,只能使用列的名称
res = data[['detail_id','order_id']].head()
print(res)
'''
res = data.loc[0:5,'detail_id':'dishes_id']
print(res)
res = data.iloc[0:10,1:4]
print(res)
res = data.ix[0:5,'detail_id':'dishes_id']
print(res)
增
import pandas
data = pandas.read_excel('data_file/users.xlsx')
print(data)
data.loc[:,'new_age'] = data.loc[:,'age']+1
print(data)
删
import pandas
data = pandas.read_excel('data_file/users.xlsx')
print(data)
print('-*'*60)
print('-*'*60)
res = data.drop(labels=['sex','age'],axis=1,inplace=False)
print(res)
res = data.drop(labels=[0,1,2,3,4,5],axis=0,inplace=False)
print(res)
bool_age = data.loc[:,'age']%2 == 0
age_name = data.loc[bool_age,].index
res = data.drop(labels=age_name,axis=0,inplace=False)
print(res)
改
import pandas
data = pandas.read_excel('data_file/users.xlsx')
print(data)
res = data.loc[:,'sex'] == '男'
print(res)
data_sex_man = data.loc[res,'sex']
print(data_sex_man)
data.loc[res,'sex'] = 'nan'
print(data)
print('-*'*70)
data.loc[:,'sex'] = '0'
print(data)
print('-*'*70)
bool_age = data.loc[:,'age']%2 != 0
data.loc[bool_age,'age'] = 0
print(data)
print('-*'*70)
bool_orgnize_name = data.loc[:,'ORGANIZE_NAME'] == '统计班'
data.loc[bool_orgnize_name,'ORGANIZE_NAME'] = '0506'
print(data)