一,dataframe按索引查看数据
1,使用行、列索引直接查看 (注意:先列后行!)
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = pd.date_range("2018-01-01",periods=day_data.shape[1],freq='B')
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
# 必须要注意是先列后行!!!
print(day_data2['2018-01-01']['股票0'])
# print(day_data2[["第0天","第1天"]])
2,使用loc、iloc、ix查看索引
loc(通过索引名字),iloc(通过索引下标),ix(通过索引名字+索引下标)
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
# loc:
# 冒号在loc里是闭合的,即会选择到冒号后面的数据
# print(day_data2.loc[["股票0","股票2"],("第0天","第1天")])
# iloc:
# 与loc不同,不会选择到:后的数据
# print(day_data2.iloc[0:2,0:2])
# ix:
print(day_data2.ix[[1,2],("第0天","第1天")])
3,不支持的操作
# 错误
data[行索引][列索引]
# 错误
data[:1,:2]
二,dataframe按索引修改数据
1,使用行、列索引直接修改 (注意:先列后行!)
# encoding=utf-8
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
# 必须要注意是先列后行!!!
# day_data2['第0天']['股票0'] = np.nan
# day_data2['第0天'] = np.nan
day_data2.第0天 = np.nan
print(day_data2)
# 去掉整列数据
day_data2 = day_data2.drop(["第0天","第1天"],axis=1)
print(day_data2)
2,使用loc、iloc修改索引
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
# loc:
# 冒号在loc里是闭合的,即会选择到冒号后面的数据
# day_data2.loc[["股票0","股票2"],("第0天","第1天")] = 0
# iloc:
# 与loc不同,不会选择到:后的数据
# day_data2.iloc[0:2,0:2] = 0
# ix:
day_data2.ix[[1,2],("第0天","第1天")] = 0
3,pandas的字符串方法
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql = """select * from role_info;"""
df = pd.read_sql(sql,engine)
print(df.dtypes)
print(df['role_pic'].str.split('/').tolist())
4,int赋值为nan不报错,因为DataFrame会自动将int转化为float类型的。
import pandas as pd
import numpy as np
temp =pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list("wxyz"))
print(temp.dtypes)
temp.iloc[1,1] = np.nan
print(temp.dtypes)
#-------------output---------------------------------------
w int32
x int32
y int32
z int32
dtype: object
w int32
x float64
y int32
z int32
dtype: object