pandas学习一
import pandas as pd
import numpy as np
s=pd.Series([1,3,6,np.nan,44,1])
s
0 1.0
1 3.0
2 6.0
3 NaN
4 44.0
5 1.0
dtype: float64
date=pd.date_range('20200224',periods=6)
date
DatetimeIndex(['2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27',
'2020-02-28', '2020-02-29'],
dtype='datetime64[ns]', freq='D')
df=pd.DataFrame(np.random.randn(6,4),index=date,columns=['a','b','c','d'])
df
| a | b | c | d |
---|
2020-02-24 | -0.626171 | 0.246824 | 0.165425 | 0.148035 |
---|
2020-02-25 | 0.710622 | -1.679865 | -0.777670 | 0.968605 |
---|
2020-02-26 | -0.858063 | -0.261376 | 1.132020 | -0.158369 |
---|
2020-02-27 | -0.612111 | 0.081410 | -0.034667 | 1.355139 |
---|
2020-02-28 | -0.631792 | -0.891452 | 0.488653 | 0.313943 |
---|
2020-02-29 | -0.295801 | 0.246308 | 0.231300 | 0.240889 |
---|
df.dtypes
a float64
b float64
c float64
d float64
dtype: object
df.index
DatetimeIndex(['2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27',
'2020-02-28', '2020-02-29'],
dtype='datetime64[ns]', freq='D')
df.columns
Index(['a', 'b', 'c', 'd'], dtype='object')
df.values
array([[-0.62617077, 0.24682398, 0.16542528, 0.14803493],
[ 0.71062227, -1.67986545, -0.77766962, 0.96860532],
[-0.85806291, -0.26137552, 1.13201976, -0.15836874],
[-0.61211132, 0.0814105 , -0.03466684, 1.35513929],
[-0.63179158, -0.89145187, 0.48865349, 0.31394313],
[-0.29580124, 0.24630751, 0.23130023, 0.24088918]])
df.describe()
| a | b | c | d |
---|
count | 6.000000 | 6.000000 | 6.000000 | 6.000000 |
---|
mean | -0.385553 | -0.376358 | 0.200844 | 0.478041 |
---|
std | 0.566177 | 0.769843 | 0.626907 | 0.566929 |
---|
min | -0.858063 | -1.679865 | -0.777670 | -0.158369 |
---|
25% | -0.630386 | -0.733933 | 0.015356 | 0.171248 |
---|
50% | -0.619141 | -0.089983 | 0.198363 | 0.277416 |
---|
75% | -0.374879 | 0.205083 | 0.424315 | 0.804940 |
---|
max | 0.710622 | 0.246824 | 1.132020 | 1.355139 |
---|
df.T.describe()
| 2020-02-24 00:00:00 | 2020-02-25 00:00:00 | 2020-02-26 00:00:00 | 2020-02-27 00:00:00 | 2020-02-28 00:00:00 | 2020-02-29 00:00:00 |
---|
count | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 4.000000 |
---|
mean | -0.016472 | -0.194577 | -0.036447 | 0.197443 | -0.180162 | 0.105674 |
---|
std | 0.408741 | 1.254121 | 0.837820 | 0.829252 | 0.683461 | 0.267722 |
---|
min | -0.626171 | -1.679865 | -0.858063 | -0.612111 | -0.891452 | -0.295801 |
---|
25% | -0.045516 | -1.003219 | -0.410547 | -0.179028 | -0.696707 | 0.099525 |
---|
50% | 0.156730 | -0.033524 | -0.209872 | 0.023372 | -0.158924 | 0.236095 |
---|
75% | 0.185775 | 0.775118 | 0.164228 | 0.399843 | 0.357621 | 0.242244 |
---|
max | 0.246824 | 0.968605 | 1.132020 | 1.355139 | 0.488653 | 0.246308 |
---|
df.sort_index(axis=1,ascending=False)
| d | c | b | a |
---|
2020-02-24 | 0.148035 | 0.165425 | 0.246824 | -0.626171 |
---|
2020-02-25 | 0.968605 | -0.777670 | -1.679865 | 0.710622 |
---|
2020-02-26 | -0.158369 | 1.132020 | -0.261376 | -0.858063 |
---|
2020-02-27 | 1.355139 | -0.034667 | 0.081410 | -0.612111 |
---|
2020-02-28 | 0.313943 | 0.488653 | -0.891452 | -0.631792 |
---|
2020-02-29 | 0.240889 | 0.231300 | 0.246308 | -0.295801 |
---|
df.sort_values(by='d')
| a | b | c | d |
---|
2020-02-26 | -0.858063 | -0.261376 | 1.132020 | -0.158369 |
---|
2020-02-24 | -0.626171 | 0.246824 | 0.165425 | 0.148035 |
---|
2020-02-29 | -0.295801 | 0.246308 | 0.231300 | 0.240889 |
---|
2020-02-28 | -0.631792 | -0.891452 | 0.488653 | 0.313943 |
---|
2020-02-25 | 0.710622 | -1.679865 | -0.777670 | 0.968605 |
---|
2020-02-27 | -0.612111 | 0.081410 | -0.034667 | 1.355139 |
---|