pandas_1117

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
tuples = list(zip(*[["bar","bar","baz","baz",
                    "foo","foo","qux","qux"],
                   ["one","two","one","two",
                   "one","two","one","two"]]))
tuples
[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
index
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8,2), index=index, columns=["A","B"])
df
AB
firstsecond
barone-0.346858-0.830402
two-0.529485-0.857761
bazone0.534911-0.474648
two2.0714200.744212
fooone2.373245-0.031346
two-0.6677920.161502
quxone-0.3934591.073320
two-0.9386061.267664
stacked = df.stack()
stacked
first  second   
bar    one     A   -0.346858
               B   -0.830402
       two     A   -0.529485
               B   -0.857761
baz    one     A    0.534911
               B   -0.474648
       two     A    2.071420
               B    0.744212
foo    one     A    2.373245
               B   -0.031346
       two     A   -0.667792
               B    0.161502
qux    one     A   -0.393459
               B    1.073320
       two     A   -0.938606
               B    1.267664
dtype: float64
stacked.unstack()
AB
firstsecond
barone-0.346858-0.830402
two-0.529485-0.857761
bazone0.534911-0.474648
two2.0714200.744212
fooone2.373245-0.031346
two-0.6677920.161502
quxone-0.3934591.073320
two-0.9386061.267664
stacked.unstack().unstack()
AB
secondonetwoonetwo
first
bar-0.346858-0.529485-0.830402-0.857761
baz0.5349112.071420-0.4746480.744212
foo2.373245-0.667792-0.0313460.161502
qux-0.393459-0.9386061.0733201.267664
df = pd.DataFrame({"A": ["one","one","two","three"]*3,
                  "B": ["A","B","C"]*4,
                   "C": ["foo","foo","foo","bar","bar","bar"]*2,
                  "D": np.random.randn(12),
                  "E": np.random.randn(12)})
df
ABCDE
0oneAfoo0.0530020.048254
1oneBfoo0.0540170.591209
2twoCfoo-2.1603090.636266
3threeAbar-0.8554370.407408
4oneBbar1.614733-0.072735
5oneCbar-1.324849-0.362629
6twoAfoo0.671695-0.289780
7threeBfoo1.645292-1.043627
8oneCfoo0.3225151.236447
9oneAbar1.7391351.563200
10twoBbar1.0878400.687841
11threeCbar1.6090560.081898
df.pivot_table(values=["D"], index=["A","B"], columns=["C"])
D
Cbarfoo
AB
oneA1.7391350.053002
B1.6147330.054017
C-1.3248490.322515
threeA-0.855437NaN
BNaN1.645292
C1.609056NaN
twoANaN0.671695
B1.087840NaN
CNaN-2.160309
rng = pd.date_range("20160301", periods=600, freq="s")
rng
DatetimeIndex(['2016-03-01 00:00:00', '2016-03-01 00:00:01',
               '2016-03-01 00:00:02', '2016-03-01 00:00:03',
               '2016-03-01 00:00:04', '2016-03-01 00:00:05',
               '2016-03-01 00:00:06', '2016-03-01 00:00:07',
               '2016-03-01 00:00:08', '2016-03-01 00:00:09',
               ...
               '2016-03-01 00:09:50', '2016-03-01 00:09:51',
               '2016-03-01 00:09:52', '2016-03-01 00:09:53',
               '2016-03-01 00:09:54', '2016-03-01 00:09:55',
               '2016-03-01 00:09:56', '2016-03-01 00:09:57',
               '2016-03-01 00:09:58', '2016-03-01 00:09:59'],
              dtype='datetime64[ns]', length=600, freq='S')
s = pd.Series(np.random.randint(0,500, len(rng)), index=rng)
s
2016-03-01 00:00:00    442
2016-03-01 00:00:01    435
2016-03-01 00:00:02    146
2016-03-01 00:00:03    213
2016-03-01 00:00:04    398
2016-03-01 00:00:05    275
2016-03-01 00:00:06    463
2016-03-01 00:00:07    338
2016-03-01 00:00:08     91
2016-03-01 00:00:09    252
2016-03-01 00:00:10    127
2016-03-01 00:00:11    203
2016-03-01 00:00:12    426
2016-03-01 00:00:13    142
2016-03-01 00:00:14    327
2016-03-01 00:00:15    458
2016-03-01 00:00:16    237
2016-03-01 00:00:17    474
2016-03-01 00:00:18    145
2016-03-01 00:00:19    159
2016-03-01 00:00:20    432
2016-03-01 00:00:21    494
2016-03-01 00:00:22    434
2016-03-01 00:00:23    107
2016-03-01 00:00:24    309
2016-03-01 00:00:25    486
2016-03-01 00:00:26    297
2016-03-01 00:00:27     97
2016-03-01 00:00:28    476
2016-03-01 00:00:29    163
                      ... 
2016-03-01 00:09:30    126
2016-03-01 00:09:31    424
2016-03-01 00:09:32    267
2016-03-01 00:09:33    340
2016-03-01 00:09:34    241
2016-03-01 00:09:35     62
2016-03-01 00:09:36    293
2016-03-01 00:09:37    311
2016-03-01 00:09:38     56
2016-03-01 00:09:39    224
2016-03-01 00:09:40    244
2016-03-01 00:09:41     28
2016-03-01 00:09:42    152
2016-03-01 00:09:43     97
2016-03-01 00:09:44    246
2016-03-01 00:09:45    391
2016-03-01 00:09:46    306
2016-03-01 00:09:47     12
2016-03-01 00:09:48    136
2016-03-01 00:09:49    266
2016-03-01 00:09:50    184
2016-03-01 00:09:51    308
2016-03-01 00:09:52    362
2016-03-01 00:09:53    393
2016-03-01 00:09:54    294
2016-03-01 00:09:55    152
2016-03-01 00:09:56    318
2016-03-01 00:09:57    368
2016-03-01 00:09:58    131
2016-03-01 00:09:59    131
Freq: S, dtype: int32
s.resample("2Min", how="sum")
C:\Users\jxm\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...).sum()
  if __name__ == '__main__':





2016-03-01 00:00:00    32423
2016-03-01 00:02:00    31627
2016-03-01 00:04:00    27891
2016-03-01 00:06:00    31000
2016-03-01 00:08:00    30084
Freq: 2T, dtype: int32
rng = pd.period_range("2000Q1", "2016Q1", freq="Q")
rng
PeriodIndex(['2000Q1', '2000Q2', '2000Q3', '2000Q4', '2001Q1', '2001Q2',
             '2001Q3', '2001Q4', '2002Q1', '2002Q2', '2002Q3', '2002Q4',
             '2003Q1', '2003Q2', '2003Q3', '2003Q4', '2004Q1', '2004Q2',
             '2004Q3', '2004Q4', '2005Q1', '2005Q2', '2005Q3', '2005Q4',
             '2006Q1', '2006Q2', '2006Q3', '2006Q4', '2007Q1', '2007Q2',
             '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3', '2008Q4',
             '2009Q1', '2009Q2', '2009Q3', '2009Q4', '2010Q1', '2010Q2',
             '2010Q3', '2010Q4', '2011Q1', '2011Q2', '2011Q3', '2011Q4',
             '2012Q1', '2012Q2', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
             '2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4',
             '2015Q1', '2015Q2', '2015Q3', '2015Q4', '2016Q1'],
            dtype='period[Q-DEC]', freq='Q-DEC')
rng.to_timestamp()
DatetimeIndex(['2000-01-01', '2000-04-01', '2000-07-01', '2000-10-01',
               '2001-01-01', '2001-04-01', '2001-07-01', '2001-10-01',
               '2002-01-01', '2002-04-01', '2002-07-01', '2002-10-01',
               '2003-01-01', '2003-04-01', '2003-07-01', '2003-10-01',
               '2004-01-01', '2004-04-01', '2004-07-01', '2004-10-01',
               '2005-01-01', '2005-04-01', '2005-07-01', '2005-10-01',
               '2006-01-01', '2006-04-01', '2006-07-01', '2006-10-01',
               '2007-01-01', '2007-04-01', '2007-07-01', '2007-10-01',
               '2008-01-01', '2008-04-01', '2008-07-01', '2008-10-01',
               '2009-01-01', '2009-04-01', '2009-07-01', '2009-10-01',
               '2010-01-01', '2010-04-01', '2010-07-01', '2010-10-01',
               '2011-01-01', '2011-04-01', '2011-07-01', '2011-10-01',
               '2012-01-01', '2012-04-01', '2012-07-01', '2012-10-01',
               '2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01',
               '2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01',
               '2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01',
               '2016-01-01'],
              dtype='datetime64[ns]', freq='QS-OCT')
pd.Timestamp("20160301")-pd.Timestamp("20160201")
Timedelta('29 days 00:00:00')
df = pd.DataFrame({"id":[1,2,3,4,5,6],"raw_grade":["a","b","b","a","a","d"]})
df
idraw_grade
01a
12b
23b
34a
45a
56d
df["grade"] = df.raw_grade.astype("category")
df
idraw_gradegrade
01aa
12bb
23bb
34aa
45aa
56dd
df.grade
0    a
1    b
2    b
3    a
4    a
5    d
Name: grade, dtype: category
Categories (3, object): [a, b, d]
df.grade.cat.categories
Index(['a', 'b', 'd'], dtype='object')
df.grade.cat.categories = ["very good","good","bad"]
df
idraw_gradegrade
01avery good
12bgood
23bgood
34avery good
45avery good
56dbad
df.sort_values(by="grade", ascending=True)
idraw_gradegrade
01avery good
34avery good
45avery good
12bgood
23bgood
56dbad
s = pd.Series(np.random.randn(1000), index=pd.date_range("20000101", periods=1000))
s
2000-01-01    1.595426
2000-01-02    2.072600
2000-01-03    0.432756
2000-01-04    0.426376
2000-01-05   -0.491930
2000-01-06    0.201953
2000-01-07   -0.486838
2000-01-08    1.583397
2000-01-09    0.465889
2000-01-10    0.436978
2000-01-11   -0.597867
2000-01-12    1.402368
2000-01-13    1.066815
2000-01-14    1.070015
2000-01-15    0.413151
2000-01-16   -0.779842
2000-01-17    0.264469
2000-01-18   -0.842073
2000-01-19    0.797955
2000-01-20   -1.413759
2000-01-21    0.434063
2000-01-22   -1.448270
2000-01-23   -0.550196
2000-01-24    1.007455
2000-01-25    1.282003
2000-01-26    0.567412
2000-01-27    0.188337
2000-01-28   -0.989570
2000-01-29    0.348961
2000-01-30    0.660422
                ...   
2002-08-28    0.851649
2002-08-29    0.758766
2002-08-30    1.762871
2002-08-31    2.340199
2002-09-01   -1.040113
2002-09-02    1.759316
2002-09-03    0.676174
2002-09-04   -0.837802
2002-09-05    2.193853
2002-09-06   -0.508849
2002-09-07    2.170355
2002-09-08    0.153670
2002-09-09   -0.587198
2002-09-10    0.313317
2002-09-11    0.523073
2002-09-12   -1.119825
2002-09-13   -0.451295
2002-09-14   -0.389207
2002-09-15   -0.863233
2002-09-16   -0.564046
2002-09-17    0.849943
2002-09-18    0.201502
2002-09-19   -1.145370
2002-09-20    0.808674
2002-09-21   -1.190614
2002-09-22    0.259530
2002-09-23    0.973751
2002-09-24   -0.937229
2002-09-25    0.181833
2002-09-26    1.301469
Freq: D, dtype: float64
s = s.cumsum()
s
2000-01-01     1.595426
2000-01-02     3.668026
2000-01-03     4.100782
2000-01-04     4.527158
2000-01-05     4.035227
2000-01-06     4.237181
2000-01-07     3.750342
2000-01-08     5.333739
2000-01-09     5.799628
2000-01-10     6.236605
2000-01-11     5.638738
2000-01-12     7.041106
2000-01-13     8.107922
2000-01-14     9.177936
2000-01-15     9.591087
2000-01-16     8.811245
2000-01-17     9.075714
2000-01-18     8.233641
2000-01-19     9.031596
2000-01-20     7.617836
2000-01-21     8.051899
2000-01-22     6.603629
2000-01-23     6.053433
2000-01-24     7.060888
2000-01-25     8.342891
2000-01-26     8.910303
2000-01-27     9.098640
2000-01-28     8.109071
2000-01-29     8.458032
2000-01-30     9.118453
                ...    
2002-08-28    83.281560
2002-08-29    84.040326
2002-08-30    85.803197
2002-08-31    88.143396
2002-09-01    87.103282
2002-09-02    88.862598
2002-09-03    89.538772
2002-09-04    88.700970
2002-09-05    90.894822
2002-09-06    90.385973
2002-09-07    92.556328
2002-09-08    92.709998
2002-09-09    92.122800
2002-09-10    92.436118
2002-09-11    92.959191
2002-09-12    91.839366
2002-09-13    91.388071
2002-09-14    90.998864
2002-09-15    90.135631
2002-09-16    89.571585
2002-09-17    90.421528
2002-09-18    90.623030
2002-09-19    89.477660
2002-09-20    90.286334
2002-09-21    89.095720
2002-09-22    89.355250
2002-09-23    90.329001
2002-09-24    89.391773
2002-09-25    89.573606
2002-09-26    90.875075
Freq: D, dtype: float64
s.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x2a5483a9550>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-CGmpLhF1-1573961301001)(output_25_1.png)]

df = pd.DataFrame(np.random.randn(100,4), columns=list("ABCD"))
df
ABCD
00.125103-0.047520-0.2403992.501893
1-1.0194311.333626-0.3867581.227110
2-1.1877720.2776021.039957-0.324204
3-0.988399-0.3828320.1960231.422645
40.1628660.284484-0.481481-0.064925
5-0.2529820.101673-0.363448-0.577417
6-1.329524-0.0617702.485569-0.148556
7-0.3237601.8936060.2914761.205642
8-1.099546-0.4022162.2545531.036753
9-0.483892-0.0650151.644008-1.079580
100.120246-1.068981-1.976976-0.220611
11-0.370101-2.034787-0.1307441.085791
121.631983-0.3018521.144025-0.331214
13-1.503589-0.5825840.0022991.436808
14-0.1619540.6974040.8389181.382634
15-0.0058430.956195-1.4088160.857727
16-1.3753630.7785551.075712-0.476712
171.565101-0.0514211.2590820.599417
18-0.793296-0.431271-1.0746180.221511
190.574572-0.3305260.8223012.243612
20-0.218847-0.2008720.5590550.444604
210.0793540.490181-0.971043-1.298320
22-0.3398980.974481-1.562591-0.120771
231.0397221.466950-0.1340921.798354
240.1991160.2141730.1356940.407160
250.021932-0.880740-1.832740-1.670625
26-0.0130081.1951360.694926-0.025821
271.0742900.0583800.280012-0.473543
28-0.5044021.063999-1.866300-0.731570
29-0.4688620.295581-0.3490990.270034
...............
70-0.1029831.2733620.601778-1.255539
711.0592080.095890-0.6669901.353239
720.330271-1.465155-0.454793-0.645915
730.6125900.857497-0.8111330.854238
74-0.5341250.407952-0.242628-0.442530
750.1035920.827345-0.1243011.002852
760.013706-0.335187-0.972141-2.414027
770.742050-1.079103-1.3557770.459921
782.209496-0.5254111.130483-1.104203
790.432017-0.613601-0.8895160.226079
801.155052-0.220674-1.0520160.599290
81-0.9873671.030958-0.056030-0.951313
82-0.464277-0.8029860.1763420.935201
830.0421710.6739350.8715070.112901
84-1.176412-1.1484880.983801-3.525204
85-0.3964311.7911142.092050-1.113374
861.4383620.2165730.054065-0.426881
87-1.161840-2.1667391.3813521.749754
881.738967-1.3155160.531161-1.312078
890.0936130.360908-0.760932-0.527739
90-0.3594091.291480-0.253727-0.209233
91-0.823306-0.1361201.0419660.540554
920.784981-0.388069-0.683943-1.201355
931.523748-0.0200330.9098271.477140
94-0.8420342.1384601.9772220.337255
952.6900391.219984-2.1230121.098065
960.1193401.097970-0.146863-0.866127
97-1.4745100.4910750.710219-0.559791
98-0.0053960.5791490.3880450.458575
99-1.2777942.154093-0.642422-0.728245

100 rows × 4 columns

df.to_csv("data.csv")
%ls
 驱动器 C 中的卷是 Windows
 卷的序列号是 9C4B-695D

 C:\Users\jxm 的目录

2019/11/17  11:22    <DIR>          .
2019/11/17  11:22    <DIR>          ..
2019/10/24  13:03    <DIR>          .anaconda
2019/06/24  21:03    <DIR>          .android
2019/10/24  14:03    <DIR>          .astropy
2019/10/24  20:37                66 .condarc
2019/10/24  13:04    <DIR>          .continuum
2019/11/17  10:25    <DIR>          .ipynb_checkpoints
2019/11/16  19:52    <DIR>          .ipython
2019/11/16  20:12    <DIR>          .jupyter
2019/11/17  10:27    <DIR>          .matplotlib
2019/10/21  14:46    <DIR>          .oracle_jre_usage
2019/10/21  17:18    <DIR>          .PyCharmCE2019.2
2019/11/13  22:24    <DIR>          3D Objects
2019/11/16  19:50    <DIR>          Anaconda3
2019/05/07  15:03    <DIR>          AppData
2019/11/13  22:24    <DIR>          Contacts
2019/11/17  11:22             8,254 data.csv
2019/11/16  20:33               581 demo_1.ipynb
2019/11/15  13:22    <DIR>          Desktop
2019/11/13  22:24    <DIR>          Documents
2019/11/13  22:24    <DIR>          Downloads
2019/11/14  19:28    <DIR>          Favorites
2019/11/13  22:24    <DIR>          Links
2019/11/13  22:24    <DIR>          Music
2019/11/16  09:47    <DIR>          OneDrive
2019/11/17  10:20            59,291 pandas__11.ipynb
2019/11/17  11:21            60,369 pandas_1117.ipynb
2019/11/13  22:24    <DIR>          Pictures
2019/10/21  20:04    <DIR>          PycharmProjects
2019/04/02  14:48    <DIR>          Roaming
2019/11/13  22:24    <DIR>          Saved Games
2019/11/13  22:24    <DIR>          Searches
2019/06/22  20:08                 1 status.bin
2019/11/16  20:35               581 Untitled.ipynb
2019/11/13  22:24    <DIR>          Videos
               7 个文件        129,143 字节
              29 个目录 41,854,242,816 可用字节
%more data.csv
pd.read_csv("data.csv",index_col=0)
ABCD
00.125103-0.047520-0.2403992.501893
1-1.0194311.333626-0.3867581.227110
2-1.1877720.2776021.039957-0.324204
3-0.988399-0.3828320.1960231.422645
40.1628660.284484-0.481481-0.064925
5-0.2529820.101673-0.363448-0.577417
6-1.329524-0.0617702.485569-0.148556
7-0.3237601.8936060.2914761.205642
8-1.099546-0.4022162.2545531.036753
9-0.483892-0.0650151.644008-1.079580
100.120246-1.068981-1.976976-0.220611
11-0.370101-2.034787-0.1307441.085791
121.631983-0.3018521.144025-0.331214
13-1.503589-0.5825840.0022991.436808
14-0.1619540.6974040.8389181.382634
15-0.0058430.956195-1.4088160.857727
16-1.3753630.7785551.075712-0.476712
171.565101-0.0514211.2590820.599417
18-0.793296-0.431271-1.0746180.221511
190.574572-0.3305260.8223012.243612
20-0.218847-0.2008720.5590550.444604
210.0793540.490181-0.971043-1.298320
22-0.3398980.974481-1.562591-0.120771
231.0397221.466950-0.1340921.798354
240.1991160.2141730.1356940.407160
250.021932-0.880740-1.832740-1.670625
26-0.0130081.1951360.694926-0.025821
271.0742900.0583800.280012-0.473543
28-0.5044021.063999-1.866300-0.731570
29-0.4688620.295581-0.3490990.270034
...............
70-0.1029831.2733620.601778-1.255539
711.0592080.095890-0.6669901.353239
720.330271-1.465155-0.454793-0.645915
730.6125900.857497-0.8111330.854238
74-0.5341250.407952-0.242628-0.442530
750.1035920.827345-0.1243011.002852
760.013706-0.335187-0.972141-2.414027
770.742050-1.079103-1.3557770.459921
782.209496-0.5254111.130483-1.104203
790.432017-0.613601-0.8895160.226079
801.155052-0.220674-1.0520160.599290
81-0.9873671.030958-0.056030-0.951313
82-0.464277-0.8029860.1763420.935201
830.0421710.6739350.8715070.112901
84-1.176412-1.1484880.983801-3.525204
85-0.3964311.7911142.092050-1.113374
861.4383620.2165730.054065-0.426881
87-1.161840-2.1667391.3813521.749754
881.738967-1.3155160.531161-1.312078
890.0936130.360908-0.760932-0.527739
90-0.3594091.291480-0.253727-0.209233
91-0.823306-0.1361201.0419660.540554
920.784981-0.388069-0.683943-1.201355
931.523748-0.0200330.9098271.477140
94-0.8420342.1384601.9772220.337255
952.6900391.219984-2.1230121.098065
960.1193401.097970-0.146863-0.866127
97-1.4745100.4910750.710219-0.559791
98-0.0053960.5791490.3880450.458575
99-1.2777942.154093-0.642422-0.728245

100 rows × 4 columns

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值