Pandas 总结试卷
import pandas as pd
df = pd. DataFrame( )
df
'''
姓名 年龄 成绩
张三 18 85
李四 19 90
王五 20 78
赵六 21 92
'''
data = { '姓名' : [ '张三' , '李四' , '王五' , '赵六' ] ,
'年龄' : [ 18 , 19 , 20 , 21 ] ,
'成绩' : [ 85 , 90 , 78 , 92 ] }
df = pd. DataFrame( data)
df
姓名 年龄 成绩 0 张三 18 85 1 李四 19 90 2 王五 20 78 3 赵六 21 92
print ( df. head( 3 ) )
print ( df. tail( 2 ) )
姓名 年龄 成绩
0 张三 18 85
1 李四 19 90
2 王五 20 78
姓名 年龄 成绩
2 王五 20 78
3 赵六 21 92
df1 = pd. read_csv( 'data1.csv' , sep= '\t' )
df1
order_id quantity item_name choice_description item_price 0 1 1 Chips and Fresh Tomato Salsa NaN $2.39 1 1 1 Izze [Clementine] $3.39 2 1 1 Nantucket Nectar [Apple] $3.39 3 1 1 Chips and Tomatillo-Green Chili Salsa NaN $2.39 4 2 2 Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 ... ... ... ... ... ... 4617 1833 1 Steak Burrito [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... $11.75 4618 1833 1 Steak Burrito [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... $11.75 4619 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $11.25 4620 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... $8.75 4621 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $8.75
4622 rows × 5 columns
df1. to_excel( 'data1.xlsx' , index= False )
s = pd. Series( range ( 1 , 11 ) )
s
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 10
dtype: int64
df. values
array([['张三', 18, 85],
['李四', 19, 90],
['王五', 20, 78],
['赵六', 21, 92]], dtype=object)
df. to_dict( )
{'姓名': {0: '张三', 1: '李四', 2: '王五', 3: '赵六'},
'年龄': {0: 18, 1: 19, 2: 20, 3: 21},
'成绩': {0: 85, 1: 90, 2: 78, 3: 92}}
df2 = pd. read_excel( 'data2.xlsx' , sheet_name= 'Sheet2' )
df2
df2 = pd. read_excel( 'data2.xlsx' , sheet_name= 'Sheet2' , skiprows= 3 , header= 0 )
df2. columns = [ 'A1' , 'A2' , 'A3' , 'A4' , 'A5' , 'A6' , 'A7' , 'A8' , 'A9' , 'A10' ,
'A11' , 'A12' , 'A13' , 'A14' , 'A15' , 'A16' , 'A17' , 'A18' , 'A19' , 'A20' ,
'A21' , 'A22' , 'A23' , 'A24' , 'A25' , 'A26' , 'A27' , 'A28' , 'A29' , 'A30' ,
'A31' , 'A32' , 'A33' , 'A34' , 'A35' , 'A36' , 'A37' , 'A38' , 'A39' , 'A40' ,
'A41' , 'A42' , 'A43' , 'A44' , 'A45' , 'A46' , 'A47' , 'A48' , 'A49' , 'A50' ,
'A51' , 'A52' , 'A53' , 'A54' , 'A55' , 'A56' , 'A57' , 'A58' , 'A59' , 'A60' ]
df2. head( 1 )
df2[ 'A16' ] = pd. to_datetime( df2[ 'A16' ] )
df2[ 'A22' ] = pd. to_datetime( df2[ 'A22' ] , format = '%Y%m%d' )
print ( df2[ 'A16' ] )
print ( df2[ 'A22' ] )
0 2016-01-01
1 2016-01-01
2 2022-10-26
3 2017-01-01
4 2016-01-01
...
23946 2016-01-01
23947 2020-07-01
23948 2018-01-01
23949 2016-01-01
23950 2016-01-01
Name: A16, Length: 23951, dtype: datetime64[ns]
0 2023-12-17
1 2023-12-17
2 2023-12-17
3 2023-12-17
4 2023-12-17
...
23946 2023-12-17
23947 2023-12-17
23948 2023-12-17
23949 2023-12-17
23950 2023-12-17
Name: A22, Length: 23951, dtype: datetime64[ns]
from sklearn import datasets
iris_ = datasets. load_iris( )
iris = pd. DataFrame( data= iris_. data, columns= iris_. feature_names)
iris[ 'target' ] = iris_. target
iris
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target 0 5.1 3.5 1.4 0.2 0 1 4.9 3.0 1.4 0.2 0 2 4.7 3.2 1.3 0.2 0 3 4.6 3.1 1.5 0.2 0 4 5.0 3.6 1.4 0.2 0 ... ... ... ... ... ... 145 6.7 3.0 5.2 2.3 2 146 6.3 2.5 5.0 1.9 2 147 6.5 3.0 5.2 2.0 2 148 6.2 3.4 5.4 2.3 2 149 5.9 3.0 5.1 1.8 2
150 rows × 5 columns
iris. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal length (cm) 150 non-null float64
1 sepal width (cm) 150 non-null float64
2 petal length (cm) 150 non-null float64
3 petal width (cm) 150 non-null float64
4 target 150 non-null int32
dtypes: float64(4), int32(1)
memory usage: 5.4 KB
iris. describe( )
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target count 150.000000 150.000000 150.000000 150.000000 150.000000 mean 5.843333 3.057333 3.758000 1.199333 1.000000 std 0.828066 0.435866 1.765298 0.762238 0.819232 min 4.300000 2.000000 1.000000 0.100000 0.000000 25% 5.100000 2.800000 1.600000 0.300000 0.000000 50% 5.800000 3.000000 4.350000 1.300000 1.000000 75% 6.400000 3.300000 5.100000 1.800000 2.000000 max 7.900000 4.400000 6.900000 2.500000 2.000000
iris. describe( ) . loc[ [ '25%' , '50%' , '75%' ] , : ]
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target 25% 5.1 2.8 1.60 0.3 0.0 50% 5.8 3.0 4.35 1.3 1.0 75% 6.4 3.3 5.10 1.8 2.0
iris. quantile( [ 0.25 , 0.5 , 0.75 ] )
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target 0.25 5.1 2.8 1.60 0.3 0.0 0.50 5.8 3.0 4.35 1.3 1.0 0.75 6.4 3.3 5.10 1.8 2.0
iris. isnull( ) . sum ( )
sepal length (cm) 0
sepal width (cm) 0
petal length (cm) 0
petal width (cm) 0
target 0
dtype: int64
iris_ = datasets. load_iris( )
iris = pd. DataFrame( data= iris_. data, columns= iris_. feature_names)
iris[ 'target' ] = iris_. target
import numpy as np
for column in iris. columns[ : 4 ] :
random_indices = np. random. choice( iris. index, 2 , replace= False )
iris. loc[ random_indices, column] = np. nan
iris[ iris. isnull( ) . any ( axis= 1 ) ]
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target 7 5.0 3.4 1.5 NaN 0 8 NaN 2.9 1.4 0.2 0 39 5.1 3.4 NaN 0.2 0 71 6.1 2.8 4.0 NaN 1 75 6.6 NaN 4.4 1.4 1 86 6.7 3.1 NaN 1.5 1 115 6.4 NaN 5.3 2.3 2 137 NaN 3.1 5.5 1.8 2
iris = iris. fillna( iris. groupby( 'target' ) . transform( 'median' ) )
iris[ iris. isnull( ) . any ( axis= 1 ) ]
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
iris[ 'target' ] . value_counts( )
target
0 50
1 50
2 50
Name: count, dtype: int64
iris. drop( 0 , axis= 0 , inplace= True )
iris[ 'target' ] . value_counts( )
target
1 50
2 50
0 49
Name: count, dtype: int64
iris. columns = [ '花萼长度' , '花萼宽度' , '花瓣长度' , '花瓣宽度' , '类别' ]
iris
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 1 4.9 3.0 1.4 0.2 0 2 4.7 3.2 1.3 0.2 0 3 4.6 3.1 1.5 0.2 0 4 5.0 3.6 1.4 0.2 0 5 5.4 3.9 1.7 0.4 0 ... ... ... ... ... ... 145 6.7 3.0 5.2 2.3 2 146 6.3 2.5 5.0 1.9 2 147 6.5 3.0 5.2 2.0 2 148 6.2 3.4 5.4 2.3 2 149 5.9 3.0 5.1 1.8 2
149 rows × 5 columns
iris[ '花萼长度' ] . corr( iris[ '花萼宽度' ] )
-0.11924860426893708
iris[ iris[ '花萼长度' ] > 7 ]
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 102 7.1 3.0 5.9 2.1 2 105 7.6 3.0 6.6 2.1 2 107 7.3 2.9 6.3 1.8 2 109 7.2 3.6 6.1 2.5 2 117 7.7 3.8 6.7 2.2 2 118 7.7 2.6 6.9 2.3 2 122 7.7 2.8 6.7 2.0 2 125 7.2 3.2 6.0 1.8 2 129 7.2 3.0 5.8 1.6 2 130 7.4 2.8 6.1 1.9 2 131 7.9 3.8 6.4 2.0 2 135 7.7 3.0 6.1 2.3 2
iris[ ( iris[ '花萼长度' ] > 7 ) & ( iris[ '花萼宽度' ] < 3 ) ]
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 107 7.3 2.9 6.3 1.8 2 118 7.7 2.6 6.9 2.3 2 122 7.7 2.8 6.7 2.0 2 130 7.4 2.8 6.1 1.9 2
iris. iloc[ 2 : 5 , : ]
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 3 4.6 3.1 1.5 0.2 0 4 5.0 3.6 1.4 0.2 0 5 5.4 3.9 1.7 0.4 0
iris. iloc[ 2 : 5 , [ 0 , 1 ] ]
花萼长度 花萼宽度 3 4.6 3.1 4 5.0 3.6 5 5.4 3.9
iris[ iris[ '类别' ] == 2 ] . head( 4 )
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 100 6.3 3.3 6.0 2.5 2 101 5.8 2.7 5.1 1.9 2 102 7.1 3.0 5.9 2.1 2 103 6.3 2.9 5.6 1.8 2
iris[ iris[ '类别' ] == 1 ] . iloc[ 2 : 5 , : ]
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 52 6.9 3.1 4.9 1.5 1 53 5.5 2.3 4.0 1.3 1 54 6.5 2.8 4.6 1.5 1
iris[ iris[ '花萼长度' ] > 7 ] . sort_values( by= '花瓣长度' , ascending= False )
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 118 7.7 2.6 6.9 2.3 2 117 7.7 3.8 6.7 2.2 2 122 7.7 2.8 6.7 2.0 2 105 7.6 3.0 6.6 2.1 2 131 7.9 3.8 6.4 2.0 2 107 7.3 2.9 6.3 1.8 2 109 7.2 3.6 6.1 2.5 2 130 7.4 2.8 6.1 1.9 2 135 7.7 3.0 6.1 2.3 2 125 7.2 3.2 6.0 1.8 2 102 7.1 3.0 5.9 2.1 2 129 7.2 3.0 5.8 1.6 2
iris. sort_values( by= '花萼长度' , ascending= False ) . reset_index( drop= True )
花萼长度 花萼宽度 花瓣长度 花瓣宽度 类别 0 7.9 3.8 6.4 2.0 2 1 7.7 3.0 6.1 2.3 2 2 7.7 2.8 6.7 2.0 2 3 7.7 3.8 6.7 2.2 2 4 7.7 2.6 6.9 2.3 2 ... ... ... ... ... ... 144 4.6 3.6 1.0 0.2 0 145 4.5 2.3 1.3 0.3 0 146 4.4 3.0 1.3 0.2 0 147 4.4 3.2 1.3 0.2 0 148 4.3 3.0 1.1 0.1 0
149 rows × 5 columns
df = pd. DataFrame( { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' , 'foo' , 'bar' , 'foo' , 'foo' ] ,
'B' : [ 'one' , 'one' , 'two' , 'three' , 'two' , 'two' , 'one' , 'three' ] ,
'C' : [ 1 , 3 , 2 , 5 , 4 , 1 , 2 , 3 ] ,
'D' : [ 2 , 4 , 5 , 5 , 1 , 2 , 4 , 4 ] ,
'E' : [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ] ,
'F' : [ 2 , 3 , 4 , 1 , 2 , 3 , 4 , 4 ] } )
df
A B C D E F 0 foo one 1 2 1 2 1 bar one 3 4 2 3 2 foo two 2 5 3 4 3 bar three 5 5 4 1 4 foo two 4 1 5 2 5 bar two 1 2 6 3 6 foo one 2 4 7 4 7 foo three 3 4 8 4
df. groupby( [ 'A' , 'C' ] ) [ 'D' ] . mean( )
A C
bar 1 2.0
3 4.0
5 5.0
foo 1 2.0
2 4.5
3 4.0
4 1.0
Name: D, dtype: float64
df. groupby( 'A' ) [ 'C' ] . agg( [ 'mean' , 'max' , 'min' , 'std' , 'var' ] )
mean max min std var A bar 3.0 5 1 2.000000 4.0 foo 2.4 4 1 1.140175 1.3
df. groupby( 'A' ) [ 'C' ] . agg( [ 'mean' , 'max' , 'min' , 'std' , 'var' ] ) \
. rename( columns= { 'mean' : '均值' ,
'max' : '最大值' ,
'min' : '最小值' ,
'std' : '标准差' ,
'var' : '方差' } )
均值 最大值 最小值 标准差 方差 A bar 3.0 5 1 2.000000 4.0 foo 2.4 4 1 1.140175 1.3
df. groupby( 'A' ) . agg( { 'C' : 'sum' , 'D' : 'mean' } )
C D A bar 9 3.666667 foo 12 3.200000
df. groupby( [ 'A' ] ) [ 'C' ] . apply ( lambda x: x * 2 )
A
bar 1 6
3 10
5 2
foo 0 2
2 4
4 8
6 4
7 6
Name: C, dtype: int64
df[ 'B' ] = df[ 'B' ] . map ( { 'one' : 1 , 'two' : 2 , 'three' : 3 } )
df
A B C D E F 0 foo 1 1 2 1 2 1 bar 1 3 4 2 3 2 foo 2 2 5 3 4 3 bar 3 5 5 4 1 4 foo 2 4 1 5 2 5 bar 2 1 2 6 3 6 foo 1 2 4 7 4 7 foo 3 3 4 8 4
df[ 'A' ] = df[ 'A' ] . str . replace( 'foo' , 'fooo' ) . str . replace( 'bar' , 'rbar' )
df
A B C D E F 0 fooo 1 1 2 1 2 1 rbar 1 3 4 2 3 2 fooo 2 2 5 3 4 3 rbar 3 5 5 4 1 4 fooo 2 4 1 5 2 5 rbar 2 1 2 6 3 6 fooo 1 2 4 7 4 7 fooo 3 3 4 8 4
df1 = pd. DataFrame( { 'id' : [ '001' , '002' , '003' , '004' ] ,
'name' : [ 'Alice' , 'Bob' , 'Charlie' , 'David' ] ,
'type' : [ 'A' , 'B' , 'A' , 'B' ] } )
df2 = pd. DataFrame( { 'id' : [ '001' , '002' , '003' , '004' , '005' ] ,
'phone' : [ '123456789' , '987654321' , '111111111' , '222222222' , '333333333' ] ,
'bill' : [ 100 , 200 , 300 , 400 , 500 ] } )
print ( df1)
print ( df2)
id name type
0 001 Alice A
1 002 Bob B
2 003 Charlie A
3 004 David B
id phone bill
0 001 123456789 100
1 002 987654321 200
2 003 111111111 300
3 004 222222222 400
4 005 333333333 500
df1. merge( df2, on= 'id' , how= 'inner' )
id name type phone bill 0 001 Alice A 123456789 100 1 002 Bob B 987654321 200 2 003 Charlie A 111111111 300 3 004 David B 222222222 400
df1. merge( df2, on= 'id' , how= 'left' )
id name type phone bill 0 001 Alice A 123456789 100 1 002 Bob B 987654321 200 2 003 Charlie A 111111111 300 3 004 David B 222222222 400
df1. merge( df2, on= 'id' , how= 'right' )
id name type phone bill 0 001 Alice A 123456789 100 1 002 Bob B 987654321 200 2 003 Charlie A 111111111 300 3 004 David B 222222222 400 4 005 NaN NaN 333333333 500
df1. merge( df2, on= 'id' , how= 'outer' )
id name type phone bill 0 001 Alice A 123456789 100 1 002 Bob B 987654321 200 2 003 Charlie A 111111111 300 3 004 David B 222222222 400 4 005 NaN NaN 333333333 500
pd. concat( [ df1, df2] , axis= 0 )
id name type phone bill 0 001 Alice A NaN NaN 1 002 Bob B NaN NaN 2 003 Charlie A NaN NaN 3 004 David B NaN NaN 0 001 NaN NaN 123456789 100.0 1 002 NaN NaN 987654321 200.0 2 003 NaN NaN 111111111 300.0 3 004 NaN NaN 222222222 400.0 4 005 NaN NaN 333333333 500.0
pd. concat( [ df1, df2] , axis= 1 )
id name type id phone bill 0 001 Alice A 001 123456789 100 1 002 Bob B 002 987654321 200 2 003 Charlie A 003 111111111 300 3 004 David B 004 222222222 400 4 NaN NaN NaN 005 333333333 500
pd. date_range( start= '2021-01-01' , periods= 10 )
DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
'2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
'2021-01-09', '2021-01-10'],
dtype='datetime64[ns]', freq='D')
df3 = pd. read_csv( 'data3.csv' )
df3
Date Open High Low Close Volume Adj Close 0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35 1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97 2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03 3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48 4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52 ... ... ... ... ... ... ... ... 8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41 8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40 8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39 8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42 8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45
8465 rows × 7 columns
df3. dtypes
Date object
Open float64
High float64
Low float64
Close float64
Volume int64
Adj Close float64
dtype: object
df3[ 'Date' ] = pd. to_datetime( df3[ 'Date' ] )
df3
Date Open High Low Close Volume Adj Close 0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35 1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97 2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03 3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48 4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52 ... ... ... ... ... ... ... ... 8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41 8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40 8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39 8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42 8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45
8465 rows × 7 columns
df3. set_index( 'Date' , inplace= True )
df3
Open High Low Close Volume Adj Close Date 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52 ... ... ... ... ... ... ... 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45
8465 rows × 6 columns
df3. resample( 'M' ) . sum ( )
C:\Users\ksufe\AppData\Local\Temp\ipykernel_10340\3032457932.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
df3.resample('M').sum()
Open High Low Close Volume Adj Close Date 1980-12-31 396.26 397.38 395.76 395.76 336212800 6.15 1981-01-31 666.85 668.36 664.75 664.75 152247200 10.37 1981-02-28 503.12 504.87 501.75 501.75 80404800 7.81 1981-03-31 548.63 550.37 546.40 546.40 175179200 8.53 1981-04-30 573.02 574.73 571.77 571.77 134232000 8.89 ... ... ... ... ... ... ... 2014-03-31 11205.46 11265.53 11131.49 11197.50 1250424700 1590.75 2014-04-30 11341.72 11431.33 11261.51 11362.56 1608765200 1614.21 2014-05-31 12627.34 12733.83 12564.99 12667.11 1433917100 1807.23 2014-06-30 4669.56 4705.77 4635.45 4675.82 1206556300 1929.60 2014-07-31 471.47 475.02 467.44 472.35 210918300 472.35
404 rows × 6 columns
df3. resample( 'Y' ) . mean( )
C:\Users\ksufe\AppData\Local\Temp\ipykernel_10340\2278679952.py:2: FutureWarning: 'Y' is deprecated and will be removed in a future version, please use 'YE' instead.
df3.resample('Y').mean()
Open High Low Close Volume Adj Close Date 1980-12-31 30.481538 30.567692 30.443077 30.443077 2.586252e+07 0.473077 1981-12-31 24.386349 24.471865 24.311151 24.311151 8.131889e+06 0.378651 1982-12-31 19.139723 19.412688 18.957036 19.142727 2.111167e+07 0.298261 1983-12-31 37.524841 38.376071 36.669841 37.521984 4.134987e+07 0.584643 1984-12-31 26.869960 27.393755 26.351581 26.801897 4.148126e+07 0.417787 1985-12-31 20.378814 20.595178 20.128656 20.194941 4.495383e+07 0.314862 1986-12-31 32.387391 32.938498 31.853676 32.460672 5.269093e+07 0.505494 1987-12-31 53.822688 55.036443 52.694585 53.889526 5.906256e+07 1.215652 1988-12-31 41.555889 42.186364 40.890356 41.538893 4.080334e+07 1.305771 1989-12-31 41.615000 42.300238 40.978611 41.658571 5.050181e+07 1.322341 1990-12-31 37.502016 38.219486 36.817233 37.561937 4.387544e+07 1.205257 1991-12-31 52.451542 53.425534 51.506877 52.494545 5.666764e+07 1.702292 1992-12-31 54.803661 55.602047 53.965000 54.802835 4.049007e+07 1.792795 1993-12-31 41.063241 41.778300 40.284783 41.026601 5.578353e+07 1.354664 1994-12-31 34.052222 34.711548 33.412897 34.080317 5.670228e+07 1.142738 1995-12-31 40.623056 41.267024 39.908413 40.540476 7.367712e+07 1.375556 1996-12-31 25.048110 25.421378 24.504803 24.919409 5.235652e+07 0.850709 1997-12-31 18.032372 18.360237 17.628972 17.966403 7.111004e+07 0.613123 1998-12-31 30.512381 31.265556 29.776627 30.564603 1.142800e+08 1.043413 1999-12-31 57.659484 59.099881 56.300992 57.770278 1.360146e+08 1.972063 2000-12-31 71.863889 74.191230 69.609563 71.749206 1.193468e+08 3.120873 2001-12-31 20.165323 20.766290 19.622379 20.219355 9.542117e+07 1.380484 2002-12-31 19.128056 19.522063 18.716270 19.139444 7.640271e+07 1.306825 2003-12-31 18.521786 18.843492 18.206984 18.544762 7.066493e+07 1.265992 2004-12-31 35.421468 36.029444 34.924643 35.526944 1.208350e+08 2.425635 2005-12-31 52.349683 53.111230 51.588214 52.401746 1.809534e+08 6.373651 2006-12-31 70.987610 71.939124 69.810359 70.810637 2.148396e+08 9.668964 2007-12-31 128.389084 130.070478 126.184502 128.273904 2.460119e+08 17.515219 2008-12-31 142.313755 145.110672 138.857708 141.979012 2.825901e+08 19.386324 2009-12-31 146.619087 148.495675 144.964881 146.814127 1.421168e+08 20.047063 2010-12-31 259.957619 262.368810 256.847619 259.842460 1.498263e+08 35.479802 2011-12-31 364.061429 367.423571 360.297698 364.004325 1.230747e+08 49.703135 2012-12-31 576.652720 581.825400 569.921160 576.049720 1.319642e+08 78.847600 2013-12-31 473.128135 477.638929 468.247103 472.634881 1.016087e+08 65.994563 2014-12-31 477.553256 481.363721 474.229922 478.036589 7.265242e+07 80.837674
df3. reset_index( inplace= True )
df3
Date Open High Low Close Volume Adj Close 0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35 1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97 2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03 3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48 4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52 ... ... ... ... ... ... ... ... 8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41 8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40 8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39 8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42 8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45
8465 rows × 7 columns
df3[ 'year' ] = df3[ 'Date' ] . dt. year
df3[ 'month' ] = df3[ 'Date' ] . dt. month
df3[ 'day' ] = df3[ 'Date' ] . dt. day
df3[ 'weekday' ] = df3[ 'Date' ] . dt. weekday
df3
Date Open High Low Close Volume Adj Close year month day weekday 0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35 2014 7 8 1 1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97 2014 7 7 0 2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03 2014 7 3 3 3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48 2014 7 2 2 4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52 2014 7 1 1 ... ... ... ... ... ... ... ... ... ... ... ... 8460 1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41 1980 12 18 3 8461 1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40 1980 12 17 2 8462 1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39 1980 12 16 1 8463 1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42 1980 12 15 0 8464 1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45 1980 12 12 4
8465 rows × 11 columns
new_china = pd. to_datetime( '1949-10-01' )
new_china
Timestamp('1949-10-01 00:00:00')
new_china. year, new_china. month, new_china. day, new_china. weekday( )
(1949, 10, 1, 5)
new_china. weekday( ) < 5
False