查询api说明:
help(pd.Series.loc)
创建dataframe并用部分或者全部数据填充:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasilia'],
'Population': [11190846, 1303171035, 207847528]}
df = pd.DataFrame(data,columns=['Country', 'Capital'])
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=[7, 8, 9], columns=['max_speed', 'shield'])
tuples = [
('cobra', 'mark i', 'name'), ('cobra', 'mark i', 'name1'), ('cobra', 'mark ii', 'name'),
('sidewinder', 'mark i', 'name'), ('sidewinder', 'mark ii', 'name'),
('viper', 'mark ii', 'name'), ('viper', 'mark iii', 'name')
]
index = pd.MultiIndex.from_tuples(tuples)
values = [[12, 2, 'a'], [12, 2, 'a'], [0, 4, 'a'], [10, 20, 'a'],
[1, 4, 'a'], [7, 1, 'a'], [16, 36, 'a']]
df = pd.DataFrame(values, columns=['max_speed', 'shield', 'name'], index=index)
从csv导入数据:
pd.read_csv('file.csv', header=None, nrows=5)
从excel导入数据:
pd.read_excel('filepath', sheet_name='Sheet1')
从数据库导入数据:
import pymysql
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()
engine = create_engine(config.DB_CONN_STR, echo=True)
pd.read_sql_query("SELECT * FROM table", engine)
从 json字符串导入数据:
pd.read_json(jsonString)
从剪贴板导入数据:
pd.read_clipboard()
导出数据:
df.to_csv(file_name)
df.to_excel(file_name)
df.to_sql(tableName, connectionObject)
df.to_json(file_name)
查看dataframe结构:
df.shape
df.index
df.columns
df.info()
按列读取datafame数据:
df['Capital']
按行读取datafame数据:
df.head(1)
df[1:2] #return datafame
df.iloc[1] # return series
df.loc[df['Capital'] == 'Brussels']
df[df['Population']>1200000000]
df.tail(1)
按单元格读取dataframe数据:
df.iloc[1][1]
df.iat[1, 1]
df.loc[df['Capital'] == 'Brussels', ['Country']]
df.loc[lambda df: df['Capital'] == 'Brussels', ['Country']]
df.loc[1, 'Country']
dataframe排序:
df.sort_values(by='Population', ascending=False, na_position='first', key=lambda col: col)
修改dataframe数据:
df.rename(columns = {‘oldName’: ‘newName’})
df.columns = ['x', 'y', 'z'] #rename all in sequence
df['new_col'] = 0 #添加列
df.drop(columns=['new_col'], inplace=True) #删除列
df = df.append(df.sum(), ignore_index=True) #添加行
df.drop([3], inplace=True) #删除行
df.iat[1, 1]='Changed' #修改单元格
统计汇总:
df.sum() #求和
df.cumsum() #累加求和
df.min() #最小值
df.max() #最大值
df.describe()
df.mean() #平均数
df.median() #中位数
df.groupby(['Country', 'Capital']).sum())
df.count()
NA处理:
pd.isnull()
pd.notnull()
df.dropna()
df.dropna(axis=1)
df.fillna(n)
apply:
f = lambda x: [1,2,3]
df = df.apply(f, axis=1, result_type='broadcast')
df = df.applymap(f)