% matplotlib inline
import pandas as pd
import numpy as np
import matplotlib. pyplot as plt
tuples = list ( zip ( * [ [ "bar" , "bar" , "baz" , "baz" ,
"foo" , "foo" , "qux" , "qux" ] ,
[ "one" , "two" , "one" , "two" ,
"one" , "two" , "one" , "two" ] ] ) )
tuples
[('bar', 'one'),
('bar', 'two'),
('baz', 'one'),
('baz', 'two'),
('foo', 'one'),
('foo', 'two'),
('qux', 'one'),
('qux', 'two')]
index = pd. MultiIndex. from_tuples( tuples, names= [ "first" , "second" ] )
index
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second'])
df = pd. DataFrame( np. random. randn( 8 , 2 ) , index= index, columns= [ "A" , "B" ] )
df
A B first second bar one -0.346858 -0.830402 two -0.529485 -0.857761 baz one 0.534911 -0.474648 two 2.071420 0.744212 foo one 2.373245 -0.031346 two -0.667792 0.161502 qux one -0.393459 1.073320 two -0.938606 1.267664
stacked = df. stack( )
stacked
first second
bar one A -0.346858
B -0.830402
two A -0.529485
B -0.857761
baz one A 0.534911
B -0.474648
two A 2.071420
B 0.744212
foo one A 2.373245
B -0.031346
two A -0.667792
B 0.161502
qux one A -0.393459
B 1.073320
two A -0.938606
B 1.267664
dtype: float64
stacked. unstack( )
A B first second bar one -0.346858 -0.830402 two -0.529485 -0.857761 baz one 0.534911 -0.474648 two 2.071420 0.744212 foo one 2.373245 -0.031346 two -0.667792 0.161502 qux one -0.393459 1.073320 two -0.938606 1.267664
stacked. unstack( ) . unstack( )
A B second one two one two first bar -0.346858 -0.529485 -0.830402 -0.857761 baz 0.534911 2.071420 -0.474648 0.744212 foo 2.373245 -0.667792 -0.031346 0.161502 qux -0.393459 -0.938606 1.073320 1.267664
df = pd. DataFrame( { "A" : [ "one" , "one" , "two" , "three" ] * 3 ,
"B" : [ "A" , "B" , "C" ] * 4 ,
"C" : [ "foo" , "foo" , "foo" , "bar" , "bar" , "bar" ] * 2 ,
"D" : np. random. randn( 12 ) ,
"E" : np. random. randn( 12 ) } )
df
A B C D E 0 one A foo 0.053002 0.048254 1 one B foo 0.054017 0.591209 2 two C foo -2.160309 0.636266 3 three A bar -0.855437 0.407408 4 one B bar 1.614733 -0.072735 5 one C bar -1.324849 -0.362629 6 two A foo 0.671695 -0.289780 7 three B foo 1.645292 -1.043627 8 one C foo 0.322515 1.236447 9 one A bar 1.739135 1.563200 10 two B bar 1.087840 0.687841 11 three C bar 1.609056 0.081898
df. pivot_table( values= [ "D" ] , index= [ "A" , "B" ] , columns= [ "C" ] )
D C bar foo A B one A 1.739135 0.053002 B 1.614733 0.054017 C -1.324849 0.322515 three A -0.855437 NaN B NaN 1.645292 C 1.609056 NaN two A NaN 0.671695 B 1.087840 NaN C NaN -2.160309
rng = pd. date_range( "20160301" , periods= 600 , freq= "s" )
rng
DatetimeIndex(['2016-03-01 00:00:00', '2016-03-01 00:00:01',
'2016-03-01 00:00:02', '2016-03-01 00:00:03',
'2016-03-01 00:00:04', '2016-03-01 00:00:05',
'2016-03-01 00:00:06', '2016-03-01 00:00:07',
'2016-03-01 00:00:08', '2016-03-01 00:00:09',
...
'2016-03-01 00:09:50', '2016-03-01 00:09:51',
'2016-03-01 00:09:52', '2016-03-01 00:09:53',
'2016-03-01 00:09:54', '2016-03-01 00:09:55',
'2016-03-01 00:09:56', '2016-03-01 00:09:57',
'2016-03-01 00:09:58', '2016-03-01 00:09:59'],
dtype='datetime64[ns]', length=600, freq='S')
s = pd. Series( np. random. randint( 0 , 500 , len ( rng) ) , index= rng)
s
2016-03-01 00:00:00 442
2016-03-01 00:00:01 435
2016-03-01 00:00:02 146
2016-03-01 00:00:03 213
2016-03-01 00:00:04 398
2016-03-01 00:00:05 275
2016-03-01 00:00:06 463
2016-03-01 00:00:07 338
2016-03-01 00:00:08 91
2016-03-01 00:00:09 252
2016-03-01 00:00:10 127
2016-03-01 00:00:11 203
2016-03-01 00:00:12 426
2016-03-01 00:00:13 142
2016-03-01 00:00:14 327
2016-03-01 00:00:15 458
2016-03-01 00:00:16 237
2016-03-01 00:00:17 474
2016-03-01 00:00:18 145
2016-03-01 00:00:19 159
2016-03-01 00:00:20 432
2016-03-01 00:00:21 494
2016-03-01 00:00:22 434
2016-03-01 00:00:23 107
2016-03-01 00:00:24 309
2016-03-01 00:00:25 486
2016-03-01 00:00:26 297
2016-03-01 00:00:27 97
2016-03-01 00:00:28 476
2016-03-01 00:00:29 163
...
2016-03-01 00:09:30 126
2016-03-01 00:09:31 424
2016-03-01 00:09:32 267
2016-03-01 00:09:33 340
2016-03-01 00:09:34 241
2016-03-01 00:09:35 62
2016-03-01 00:09:36 293
2016-03-01 00:09:37 311
2016-03-01 00:09:38 56
2016-03-01 00:09:39 224
2016-03-01 00:09:40 244
2016-03-01 00:09:41 28
2016-03-01 00:09:42 152
2016-03-01 00:09:43 97
2016-03-01 00:09:44 246
2016-03-01 00:09:45 391
2016-03-01 00:09:46 306
2016-03-01 00:09:47 12
2016-03-01 00:09:48 136
2016-03-01 00:09:49 266
2016-03-01 00:09:50 184
2016-03-01 00:09:51 308
2016-03-01 00:09:52 362
2016-03-01 00:09:53 393
2016-03-01 00:09:54 294
2016-03-01 00:09:55 152
2016-03-01 00:09:56 318
2016-03-01 00:09:57 368
2016-03-01 00:09:58 131
2016-03-01 00:09:59 131
Freq: S, dtype: int32
s. resample( "2Min" , how= "sum" )
C:\Users\jxm\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...).sum()
if __name__ == '__main__':
2016-03-01 00:00:00 32423
2016-03-01 00:02:00 31627
2016-03-01 00:04:00 27891
2016-03-01 00:06:00 31000
2016-03-01 00:08:00 30084
Freq: 2T, dtype: int32
rng = pd. period_range( "2000Q1" , "2016Q1" , freq= "Q" )
rng
PeriodIndex(['2000Q1', '2000Q2', '2000Q3', '2000Q4', '2001Q1', '2001Q2',
'2001Q3', '2001Q4', '2002Q1', '2002Q2', '2002Q3', '2002Q4',
'2003Q1', '2003Q2', '2003Q3', '2003Q4', '2004Q1', '2004Q2',
'2004Q3', '2004Q4', '2005Q1', '2005Q2', '2005Q3', '2005Q4',
'2006Q1', '2006Q2', '2006Q3', '2006Q4', '2007Q1', '2007Q2',
'2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3', '2008Q4',
'2009Q1', '2009Q2', '2009Q3', '2009Q4', '2010Q1', '2010Q2',
'2010Q3', '2010Q4', '2011Q1', '2011Q2', '2011Q3', '2011Q4',
'2012Q1', '2012Q2', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
'2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4',
'2015Q1', '2015Q2', '2015Q3', '2015Q4', '2016Q1'],
dtype='period[Q-DEC]', freq='Q-DEC')
rng. to_timestamp( )
DatetimeIndex(['2000-01-01', '2000-04-01', '2000-07-01', '2000-10-01',
'2001-01-01', '2001-04-01', '2001-07-01', '2001-10-01',
'2002-01-01', '2002-04-01', '2002-07-01', '2002-10-01',
'2003-01-01', '2003-04-01', '2003-07-01', '2003-10-01',
'2004-01-01', '2004-04-01', '2004-07-01', '2004-10-01',
'2005-01-01', '2005-04-01', '2005-07-01', '2005-10-01',
'2006-01-01', '2006-04-01', '2006-07-01', '2006-10-01',
'2007-01-01', '2007-04-01', '2007-07-01', '2007-10-01',
'2008-01-01', '2008-04-01', '2008-07-01', '2008-10-01',
'2009-01-01', '2009-04-01', '2009-07-01', '2009-10-01',
'2010-01-01', '2010-04-01', '2010-07-01', '2010-10-01',
'2011-01-01', '2011-04-01', '2011-07-01', '2011-10-01',
'2012-01-01', '2012-04-01', '2012-07-01', '2012-10-01',
'2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01',
'2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01',
'2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01',
'2016-01-01'],
dtype='datetime64[ns]', freq='QS-OCT')
pd. Timestamp( "20160301" ) - pd. Timestamp( "20160201" )
Timedelta('29 days 00:00:00')
df = pd. DataFrame( { "id" : [ 1 , 2 , 3 , 4 , 5 , 6 ] , "raw_grade" : [ "a" , "b" , "b" , "a" , "a" , "d" ] } )
df
id raw_grade 0 1 a 1 2 b 2 3 b 3 4 a 4 5 a 5 6 d
df[ "grade" ] = df. raw_grade. astype( "category" )
df
id raw_grade grade 0 1 a a 1 2 b b 2 3 b b 3 4 a a 4 5 a a 5 6 d d
df. grade
0 a
1 b
2 b
3 a
4 a
5 d
Name: grade, dtype: category
Categories (3, object): [a, b, d]
df. grade. cat. categories
Index(['a', 'b', 'd'], dtype='object')
df. grade. cat. categories = [ "very good" , "good" , "bad" ]
df
id raw_grade grade 0 1 a very good 1 2 b good 2 3 b good 3 4 a very good 4 5 a very good 5 6 d bad
df. sort_values( by= "grade" , ascending= True )
id raw_grade grade 0 1 a very good 3 4 a very good 4 5 a very good 1 2 b good 2 3 b good 5 6 d bad
s = pd. Series( np. random. randn( 1000 ) , index= pd. date_range( "20000101" , periods= 1000 ) )
s
2000-01-01 1.595426
2000-01-02 2.072600
2000-01-03 0.432756
2000-01-04 0.426376
2000-01-05 -0.491930
2000-01-06 0.201953
2000-01-07 -0.486838
2000-01-08 1.583397
2000-01-09 0.465889
2000-01-10 0.436978
2000-01-11 -0.597867
2000-01-12 1.402368
2000-01-13 1.066815
2000-01-14 1.070015
2000-01-15 0.413151
2000-01-16 -0.779842
2000-01-17 0.264469
2000-01-18 -0.842073
2000-01-19 0.797955
2000-01-20 -1.413759
2000-01-21 0.434063
2000-01-22 -1.448270
2000-01-23 -0.550196
2000-01-24 1.007455
2000-01-25 1.282003
2000-01-26 0.567412
2000-01-27 0.188337
2000-01-28 -0.989570
2000-01-29 0.348961
2000-01-30 0.660422
...
2002-08-28 0.851649
2002-08-29 0.758766
2002-08-30 1.762871
2002-08-31 2.340199
2002-09-01 -1.040113
2002-09-02 1.759316
2002-09-03 0.676174
2002-09-04 -0.837802
2002-09-05 2.193853
2002-09-06 -0.508849
2002-09-07 2.170355
2002-09-08 0.153670
2002-09-09 -0.587198
2002-09-10 0.313317
2002-09-11 0.523073
2002-09-12 -1.119825
2002-09-13 -0.451295
2002-09-14 -0.389207
2002-09-15 -0.863233
2002-09-16 -0.564046
2002-09-17 0.849943
2002-09-18 0.201502
2002-09-19 -1.145370
2002-09-20 0.808674
2002-09-21 -1.190614
2002-09-22 0.259530
2002-09-23 0.973751
2002-09-24 -0.937229
2002-09-25 0.181833
2002-09-26 1.301469
Freq: D, dtype: float64
s = s. cumsum( )
s
2000-01-01 1.595426
2000-01-02 3.668026
2000-01-03 4.100782
2000-01-04 4.527158
2000-01-05 4.035227
2000-01-06 4.237181
2000-01-07 3.750342
2000-01-08 5.333739
2000-01-09 5.799628
2000-01-10 6.236605
2000-01-11 5.638738
2000-01-12 7.041106
2000-01-13 8.107922
2000-01-14 9.177936
2000-01-15 9.591087
2000-01-16 8.811245
2000-01-17 9.075714
2000-01-18 8.233641
2000-01-19 9.031596
2000-01-20 7.617836
2000-01-21 8.051899
2000-01-22 6.603629
2000-01-23 6.053433
2000-01-24 7.060888
2000-01-25 8.342891
2000-01-26 8.910303
2000-01-27 9.098640
2000-01-28 8.109071
2000-01-29 8.458032
2000-01-30 9.118453
...
2002-08-28 83.281560
2002-08-29 84.040326
2002-08-30 85.803197
2002-08-31 88.143396
2002-09-01 87.103282
2002-09-02 88.862598
2002-09-03 89.538772
2002-09-04 88.700970
2002-09-05 90.894822
2002-09-06 90.385973
2002-09-07 92.556328
2002-09-08 92.709998
2002-09-09 92.122800
2002-09-10 92.436118
2002-09-11 92.959191
2002-09-12 91.839366
2002-09-13 91.388071
2002-09-14 90.998864
2002-09-15 90.135631
2002-09-16 89.571585
2002-09-17 90.421528
2002-09-18 90.623030
2002-09-19 89.477660
2002-09-20 90.286334
2002-09-21 89.095720
2002-09-22 89.355250
2002-09-23 90.329001
2002-09-24 89.391773
2002-09-25 89.573606
2002-09-26 90.875075
Freq: D, dtype: float64
s. plot( )
<matplotlib.axes._subplots.AxesSubplot at 0x2a5483a9550>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-CGmpLhF1-1573961301001)(output_25_1.png)]
df = pd. DataFrame( np. random. randn( 100 , 4 ) , columns= list ( "ABCD" ) )
df
A B C D 0 0.125103 -0.047520 -0.240399 2.501893 1 -1.019431 1.333626 -0.386758 1.227110 2 -1.187772 0.277602 1.039957 -0.324204 3 -0.988399 -0.382832 0.196023 1.422645 4 0.162866 0.284484 -0.481481 -0.064925 5 -0.252982 0.101673 -0.363448 -0.577417 6 -1.329524 -0.061770 2.485569 -0.148556 7 -0.323760 1.893606 0.291476 1.205642 8 -1.099546 -0.402216 2.254553 1.036753 9 -0.483892 -0.065015 1.644008 -1.079580 10 0.120246 -1.068981 -1.976976 -0.220611 11 -0.370101 -2.034787 -0.130744 1.085791 12 1.631983 -0.301852 1.144025 -0.331214 13 -1.503589 -0.582584 0.002299 1.436808 14 -0.161954 0.697404 0.838918 1.382634 15 -0.005843 0.956195 -1.408816 0.857727 16 -1.375363 0.778555 1.075712 -0.476712 17 1.565101 -0.051421 1.259082 0.599417 18 -0.793296 -0.431271 -1.074618 0.221511 19 0.574572 -0.330526 0.822301 2.243612 20 -0.218847 -0.200872 0.559055 0.444604 21 0.079354 0.490181 -0.971043 -1.298320 22 -0.339898 0.974481 -1.562591 -0.120771 23 1.039722 1.466950 -0.134092 1.798354 24 0.199116 0.214173 0.135694 0.407160 25 0.021932 -0.880740 -1.832740 -1.670625 26 -0.013008 1.195136 0.694926 -0.025821 27 1.074290 0.058380 0.280012 -0.473543 28 -0.504402 1.063999 -1.866300 -0.731570 29 -0.468862 0.295581 -0.349099 0.270034 ... ... ... ... ... 70 -0.102983 1.273362 0.601778 -1.255539 71 1.059208 0.095890 -0.666990 1.353239 72 0.330271 -1.465155 -0.454793 -0.645915 73 0.612590 0.857497 -0.811133 0.854238 74 -0.534125 0.407952 -0.242628 -0.442530 75 0.103592 0.827345 -0.124301 1.002852 76 0.013706 -0.335187 -0.972141 -2.414027 77 0.742050 -1.079103 -1.355777 0.459921 78 2.209496 -0.525411 1.130483 -1.104203 79 0.432017 -0.613601 -0.889516 0.226079 80 1.155052 -0.220674 -1.052016 0.599290 81 -0.987367 1.030958 -0.056030 -0.951313 82 -0.464277 -0.802986 0.176342 0.935201 83 0.042171 0.673935 0.871507 0.112901 84 -1.176412 -1.148488 0.983801 -3.525204 85 -0.396431 1.791114 2.092050 -1.113374 86 1.438362 0.216573 0.054065 -0.426881 87 -1.161840 -2.166739 1.381352 1.749754 88 1.738967 -1.315516 0.531161 -1.312078 89 0.093613 0.360908 -0.760932 -0.527739 90 -0.359409 1.291480 -0.253727 -0.209233 91 -0.823306 -0.136120 1.041966 0.540554 92 0.784981 -0.388069 -0.683943 -1.201355 93 1.523748 -0.020033 0.909827 1.477140 94 -0.842034 2.138460 1.977222 0.337255 95 2.690039 1.219984 -2.123012 1.098065 96 0.119340 1.097970 -0.146863 -0.866127 97 -1.474510 0.491075 0.710219 -0.559791 98 -0.005396 0.579149 0.388045 0.458575 99 -1.277794 2.154093 -0.642422 -0.728245
100 rows × 4 columns
df. to_csv( "data.csv" )
% ls
驱动器 C 中的卷是 Windows
卷的序列号是 9C4B-695D
C:\Users\jxm 的目录
2019/11/17 11:22 <DIR> .
2019/11/17 11:22 <DIR> ..
2019/10/24 13:03 <DIR> .anaconda
2019/06/24 21:03 <DIR> .android
2019/10/24 14:03 <DIR> .astropy
2019/10/24 20:37 66 .condarc
2019/10/24 13:04 <DIR> .continuum
2019/11/17 10:25 <DIR> .ipynb_checkpoints
2019/11/16 19:52 <DIR> .ipython
2019/11/16 20:12 <DIR> .jupyter
2019/11/17 10:27 <DIR> .matplotlib
2019/10/21 14:46 <DIR> .oracle_jre_usage
2019/10/21 17:18 <DIR> .PyCharmCE2019.2
2019/11/13 22:24 <DIR> 3D Objects
2019/11/16 19:50 <DIR> Anaconda3
2019/05/07 15:03 <DIR> AppData
2019/11/13 22:24 <DIR> Contacts
2019/11/17 11:22 8,254 data.csv
2019/11/16 20:33 581 demo_1.ipynb
2019/11/15 13:22 <DIR> Desktop
2019/11/13 22:24 <DIR> Documents
2019/11/13 22:24 <DIR> Downloads
2019/11/14 19:28 <DIR> Favorites
2019/11/13 22:24 <DIR> Links
2019/11/13 22:24 <DIR> Music
2019/11/16 09:47 <DIR> OneDrive
2019/11/17 10:20 59,291 pandas__11.ipynb
2019/11/17 11:21 60,369 pandas_1117.ipynb
2019/11/13 22:24 <DIR> Pictures
2019/10/21 20:04 <DIR> PycharmProjects
2019/04/02 14:48 <DIR> Roaming
2019/11/13 22:24 <DIR> Saved Games
2019/11/13 22:24 <DIR> Searches
2019/06/22 20:08 1 status.bin
2019/11/16 20:35 581 Untitled.ipynb
2019/11/13 22:24 <DIR> Videos
7 个文件 129,143 字节
29 个目录 41,854,242,816 可用字节
% more data. csv
pd. read_csv( "data.csv" , index_col= 0 )
A B C D 0 0.125103 -0.047520 -0.240399 2.501893 1 -1.019431 1.333626 -0.386758 1.227110 2 -1.187772 0.277602 1.039957 -0.324204 3 -0.988399 -0.382832 0.196023 1.422645 4 0.162866 0.284484 -0.481481 -0.064925 5 -0.252982 0.101673 -0.363448 -0.577417 6 -1.329524 -0.061770 2.485569 -0.148556 7 -0.323760 1.893606 0.291476 1.205642 8 -1.099546 -0.402216 2.254553 1.036753 9 -0.483892 -0.065015 1.644008 -1.079580 10 0.120246 -1.068981 -1.976976 -0.220611 11 -0.370101 -2.034787 -0.130744 1.085791 12 1.631983 -0.301852 1.144025 -0.331214 13 -1.503589 -0.582584 0.002299 1.436808 14 -0.161954 0.697404 0.838918 1.382634 15 -0.005843 0.956195 -1.408816 0.857727 16 -1.375363 0.778555 1.075712 -0.476712 17 1.565101 -0.051421 1.259082 0.599417 18 -0.793296 -0.431271 -1.074618 0.221511 19 0.574572 -0.330526 0.822301 2.243612 20 -0.218847 -0.200872 0.559055 0.444604 21 0.079354 0.490181 -0.971043 -1.298320 22 -0.339898 0.974481 -1.562591 -0.120771 23 1.039722 1.466950 -0.134092 1.798354 24 0.199116 0.214173 0.135694 0.407160 25 0.021932 -0.880740 -1.832740 -1.670625 26 -0.013008 1.195136 0.694926 -0.025821 27 1.074290 0.058380 0.280012 -0.473543 28 -0.504402 1.063999 -1.866300 -0.731570 29 -0.468862 0.295581 -0.349099 0.270034 ... ... ... ... ... 70 -0.102983 1.273362 0.601778 -1.255539 71 1.059208 0.095890 -0.666990 1.353239 72 0.330271 -1.465155 -0.454793 -0.645915 73 0.612590 0.857497 -0.811133 0.854238 74 -0.534125 0.407952 -0.242628 -0.442530 75 0.103592 0.827345 -0.124301 1.002852 76 0.013706 -0.335187 -0.972141 -2.414027 77 0.742050 -1.079103 -1.355777 0.459921 78 2.209496 -0.525411 1.130483 -1.104203 79 0.432017 -0.613601 -0.889516 0.226079 80 1.155052 -0.220674 -1.052016 0.599290 81 -0.987367 1.030958 -0.056030 -0.951313 82 -0.464277 -0.802986 0.176342 0.935201 83 0.042171 0.673935 0.871507 0.112901 84 -1.176412 -1.148488 0.983801 -3.525204 85 -0.396431 1.791114 2.092050 -1.113374 86 1.438362 0.216573 0.054065 -0.426881 87 -1.161840 -2.166739 1.381352 1.749754 88 1.738967 -1.315516 0.531161 -1.312078 89 0.093613 0.360908 -0.760932 -0.527739 90 -0.359409 1.291480 -0.253727 -0.209233 91 -0.823306 -0.136120 1.041966 0.540554 92 0.784981 -0.388069 -0.683943 -1.201355 93 1.523748 -0.020033 0.909827 1.477140 94 -0.842034 2.138460 1.977222 0.337255 95 2.690039 1.219984 -2.123012 1.098065 96 0.119340 1.097970 -0.146863 -0.866127 97 -1.474510 0.491075 0.710219 -0.559791 98 -0.005396 0.579149 0.388045 0.458575 99 -1.277794 2.154093 -0.642422 -0.728245
100 rows × 4 columns