10分钟 to pandas

import pandas as pd
import numpy as np
import matplotlib.pyplot as pltx

In [3]:

dates = pd.date_range('20130101', periods=6)
dates

Out[3]:

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:

df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Out[4]:

  A B C D
2013-01-01 0.043660 0.914219 1.364281 0.960460
2013-01-02 0.245818 0.582317 0.456372 -0.734680
2013-01-03 -0.997398 -0.476202 0.967015 0.089730
2013-01-04 -1.132148 0.867161 0.458086 0.797743
2013-01-05 -1.226727 1.524988 -1.980305 0.694533
2013-01-06 1.695086 0.796078 -0.688947 -0.910752

In [5]:

df2 = pd.DataFrame({ 'A' : 1.,
              'B' : pd.Timestamp('20130102'),
              'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
              'D' : np.array([3] * 4,dtype='int32'),
              'E' : pd.Categorical(["test","train","test","train"]),
              'F' : 'foo' })
df2

Out[5]:

  A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo

In [6]:

df2.dtypes

Out[6]:

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [11]:

df.describe()

Out[11]:

  A B C D
count 6.000000 6.000000 6.000000 6.000000
mean -0.228618 0.701427 0.096084 0.149506
std 1.131678 0.657413 1.229257 0.810483
min -1.226727 -0.476202 -1.980305 -0.910752
25% -1.098461 0.635757 -0.402617 -0.528577
50% -0.476869 0.831620 0.457229 0.392131
75% 0.195279 0.902454 0.839783 0.771940
max 1.695086 1.524988 1.364281 0.960460

In [12]:

df.T

Out[12]:

  2013-01-01 00:00:00 2013-01-02 00:00:00 2013-01-03 00:00:00 2013-01-04 00:00:00 2013-01-05 00:00:00 2013-01-06 00:00:00
A 0.043660 0.245818 -0.997398 -1.132148 -1.226727 1.695086
B 0.914219 0.582317 -0.476202 0.867161 1.524988 0.796078
C 1.364281 0.456372 0.967015 0.458086 -1.980305 -0.688947
D 0.960460 -0.734680 0.089730 0.797743 0.694533 -0.910752

In [13]:

df.sort_index(axis=1, ascending=False)

Out[13]:

  D C B A
2013-01-01 0.960460 1.364281 0.914219 0.043660
2013-01-02 -0.734680 0.456372 0.582317 0.245818
2013-01-03 0.089730 0.967015 -0.476202 -0.997398
2013-01-04 0.797743 0.458086 0.867161 -1.132148
2013-01-05 0.694533 -1.980305 1.524988 -1.226727
2013-01-06 -0.910752 -0.688947 0.796078 1.695086

In [14]:

df.sort_values(by='B')

Out[14]:

  A B C D
2013-01-03 -0.997398 -0.476202 0.967015 0.089730
2013-01-02 0.245818 0.582317 0.456372 -0.734680
2013-01-06 1.695086 0.796078 -0.688947 -0.910752
2013-01-04 -1.132148 0.867161 0.458086 0.797743
2013-01-01 0.043660 0.914219 1.364281 0.960460
2013-01-05 -1.226727 1.524988 -1.980305 0.694533

In [15]:

df.apply(np.cumsum)

Out[15]:

  A B C D
2013-01-01 0.043660 0.914219 1.364281 0.960460
2013-01-02 0.289478 1.496535 1.820653 0.225781
2013-01-03 -0.707920 1.020334 2.787668 0.315511
2013-01-04 -1.840068 1.887495 3.245754 1.113254
2013-01-05 -3.066794 3.412483 1.265449 1.807786
2013-01-06 -1.371708 4.208561 0.576502 0.897035

In [16]:

df.apply(lambda x: x.max() - x.min())

Out[16]:

A    2.921813
B    2.001190
C    3.344586
D    1.871212
dtype: float64

In [18]:

s = pd.Series(np.random.randint(0, 7, size=10))
s

Out[18]:

0    3
1    3
2    3
3    4
4    3
5    1
6    6
7    3
8    1
9    4
dtype: int32

In [19]:

s.value_counts()

Out[19]:

3    5
4    2
1    2
6    1
dtype: int64

In [20]:

s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

Out[20]:

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8 
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值