1、Series1
import pandas as pd
s1 = pd. Series( [ 5 , 17 , 3 , 26 , 31 ] )
s1. values
s1. index
s1[ 2 ]
s1[ 1 : 3 ]
s1 = pd. Series( [ 5 , 17 , 3 , 26 , 31 ] , index= [ "a" , "d" , "b" , "c" , "e" ] )
s1[ "a" ]
s1[ "d" : "c" ]
s1[ "a" , "e" , "c" ]
s2 = pd. Series( [ 5 , 17 , 3 , 26 , 31 ] , index= [ 1 , 3 , 5 , 7 , 9 ] )
s2. loc[ 1 : 3 ]
s2. iloc[ 1 : 3 ]
s3 = pd. Series( { "青菜" : 4.1 , "白萝卜" : 2.2 , "西红柿" : 5.3 , "土豆" : 3.7 , "黄瓜" : 6.8 } )
"青菜" in s3
s3. loc[ "青菜" ] = 4.5
s3. iloc[ 0 ] = 4.5
s3[ s3 > 5 ]
Series练习1
import pandas as pd
s1 = pd. Series( [ - 1.2 , 3.7 , 2.5 , - 8.2 , 6.3 ] )
s1. values
s1. index
s2 = pd. Series( [ 1 , 2 , 3 , 4 , 5 ] , index= [ 5 , 4 , 3 , 2 , 1 ] )
s2. iloc[ 1 ]
s2. loc[ 5 : 3 ]
s3 = pd. Series( { "小李" : 82 , "小陈" : 93 , "小张" : 91 , "小曾" : 78 , "小邓" : 68 } )
s3. loc[ "小张" ] = 95
s3[ ( s3 > 80 ) & ( s3 < 90 ) ]
2、Series2
import pandas as pd
s1 = pd. Series( [ 1 , 4 , 2 , 3 , 5 ] , index= [ 1 , 3 , 5 , 7 , 9 ] )
s2 = pd. Series( [ 8 , 1 , 7 , 3 , 9 ] , index= [ 1 , 2 , 3 , 5 , 10 ] )
s1 + s2
s1. add( s2, fill_value= 0 )
s1. sub( s2, fill_value= 0 )
s1. mul( s2, fill_value= 1 )
s1. div( s2, fill_value= 1 )
print ( s1. max ( ) )
print ( s1. min ( ) )
print ( s1. sum ( ) )
print ( s1. mean( ) )
s1. describe( )
scores = pd. Series( { "小明" : 92 , "小红" : 67 , "小杰" : 70 , "小丽" : 88 , "小华" : 76 } )
def get_grade_from_score ( score) :
if score >= 90 :
return "A"
elif score >= 80 :
return "B"
elif score >= 70 :
return "C"
else :
return "D"
grades = scores. apply ( get_grade_from_score)
squared_scores = scores. apply ( lambda x: x* x)
Series练习2
import pandas as pd
s1 = pd. Series( [ 92 , 67 , 70 , 88 , 76 ] , index= [ "小明" , "小红" , "小杰" , "小丽" , "小华" ] )
s2 = pd. Series( [ 95 , 85 , 60 , 79 , 76 ] , index= [ "小明" , "小杰" , "小宇" , "小娟" , "小彤" ] )
s1. add( s2, fill_value= 0 )
s1. describe( )
s1 = s1+ 5
def s_grade ( score) :
if score >= 90 :
return "A"
elif score >= 80 :
return "B"
elif score >= 70 :
return "C"
elif score >= 60 :
return "D"
else :
return "不及格"
s3 = s1. apply ( s_grade)
s3
3、DataFrame1
s_id = pd. Series( [ "01" , "02" , "03" , "04" , "05" ] )
s_class = pd. Series( [ "二班" , "一班" , "二班" , "三班" , "一班" ] )
s_grade = pd. Series( [ 92 , 67 , 70 , 88 , 76 ] )
df1 = pd. DataFrame( { "学号" : s_id, "班级" : s_class, "成绩" : s_grade} )
df2 = pd. DataFrame( { "学号" : { "小明" : "01" , "小红" : "02" , "小杰" : "03" , "小丽" : "04" , "小华" : "05" } ,
"班级" : { "小明" : "二班" , "小红" : "一班" , "小杰" : "二班" , "小丽" : "三班" , "小华" : "一班" } ,
"成绩" : { "小明" : "92" , "小红" : "67" , "小杰" : "70" , "小丽" : "88" , "小华" : "76" } } )
df2. index
df2. columns
df2. values
df2. T
df2[ "班级" ]
df2. 班级
df2[ [ "学号" , "成绩" ] ]
df2. loc[ "小丽" ]
df2. iloc[ 3 ]
df2. loc[ "小红" : "小丽" ]
df2. iloc[ 1 : 3 ]
df2. loc[ [ "小红" , "小丽" ] ]
df2. iloc[ [ 3 , 1 ] ]
df2. loc[ "小杰" , "学号" ]
df2. iloc[ 2 , 0 ]
df2. loc[ "小红" : "小杰" , "班级" : "成绩" ]
df2. iloc[ 1 : 3 , 1 : 3 ]
df2. loc[ : , "班级" : "成绩" ]
df2. iloc[ : , 1 : 3 ]
df2. loc[ [ "小红" , "小丽" ] , "学号" : "班级" ]
df2. iloc[ [ 1 , 3 ] , 0 : 2 ]
df2[ df2[ "成绩" ] > 80 ]
df2[ ( df2[ "成绩" ] > 80 ) & ( df2[ "班级" ] == "三班" ) ]
df2. head( )
df2. head( 2 )
DataFrame练习1
import pandas as pd
name = pd. Series( { "001" : "小陈" , "002" : "小李" , "003" : "小王" , "004" : "小张" , "005" : "小赵" , "006" : "小周" } )
gender = pd. Series( { "006" : "女" , "005" : "女" , "004" : "男" , "003" : "男" , "002" : "女" , "001" : "男" } )
height = pd. Series( { "001" : 172.5 , "002" : 168.0 , "003" : 178.2 , "004" : 181.3 , "005" : 161.7 } )
students = pd. DataFrame( { "姓名" : name, "性别" : gender, "身高" : height} )
students
students. index
students. columns
students. T
students[ "身高" ]
students[ [ "性别" , "身高" ] ]
students. loc[ "003" ]
students. loc[ "003" : "005" ]
students. loc[ "005" , "身高" ]
students. loc[ [ "003" , "005" ] , "姓名" : "身高" ]
students. loc[ [ "003" , "005" ] , : ]
students[ ( students[ "身高" ] > 165 ) & ( students[ "性别" ] == "女" ) ]
students. head( )
4、DataFrame2
import pandas as pd
name = pd. Series( { "001" : "小陈" , "002" : "小李" , "003" : "小王" , "004" : "小张" , "005" : "小赵" , "006" : "小周" } )
gender = pd. Series( { "006" : "女" , "005" : "女" , "004" : "男" , "003" : "男" , "002" : "女" , "001" : "男" } )
height = pd. Series( { "001" : 172.5 , "002" : 168.0 , "003" : 178.2 , "004" : 181.3 , "005" : 161.7 , "006" : 159.8 } )
grade = pd. Series( { "001" : 89 , "002" : 92 , "003" : 82 , "004" : 96 , "005" : 93 , "006" : 84 } )
df1 = pd. DataFrame( { "姓名" : name, "性别" : gender, "身高" : height, "成绩" : grade} )
df1[ "成绩" ] = pd. Series( [ 90 , 91 , 83 , 95 , 94 , 85 ] , index= [ "001" , "002" , "003" , "004" , "005" , "006" ] )
df1[ "成绩" ] = [ 90 , 91 , 83 , 95 , 94 , 85 ]
df1[ "班级" ] = [ "一班" , "三班" , "二班" , "三班" , "一班" , "二班" ]
df1. loc[ "005" ] = pd. Series( [ "小赵" , "女" , 162.7 , 95 , "一班" ] , index= [ "姓名" , "性别" , "身高" , "成绩" , "班级" ] )
df1. loc[ "005" ] = [ "小赵" , "女" , 162.7 , 95 , "一班" ]
df1 = df1. drop( [ "003" , "006" ] )
df1. drop( "姓名" , axis= 1 )
df1. drop( [ "身高" , "性别" ] , axis= 1 )
df1. add( df2, fill_Value= 0 )
df1. sub( df2, fill_value= 0 )
df1. div( df2, fill_value= 0 )
df1. mul( df2, fill_value= 0 )
df1[ [ "姓名" , "性别" ] ] * 5
DataFrame练习2
import pandas as pd
students = pd. DataFrame( { "001" : { "姓名" : "小陈" , "考试1" : 85 , "考试2" : 95 , "考试3" : 92 } , "002" : { "姓名" : "小李" , "考试1" : 91 , "考试2" : 92 , "考试3" : 94 } ,
"003" : { "姓名" : "小王" , "考试1" : 86 , "考试2" : 81 , "考试3" : 89 } , "004" : { "姓名" : "小张" , "考试1" : 79 , "考试2" : 89 , "考试3" : 95 } ,
"005" : { "姓名" : "小赵" , "考试1" : 96 , "考试2" : 91 , "考试3" : 91 } , "006" : { "姓名" : "小周" , "考试1" : 81 , "考试2" : 81 , "考试3" : 92 } } )
students = students. T
students[ "考试4" ] = [ 72 , 69 , 79 , 83 , 82 , 76 ]
students. loc[ "007" ] = pd. Series( [ "小杨" , 79 , 82 , 81 , 69 ] , index= [ "姓名" , "考试1" , "考试2" , "考试3" , "考试4" ] )
students. loc[ "007" ] = [ "小杨" , 79 , 82 , 81 , 69 ]
bonus = pd. Series( { "考试1" : 2 , "考试2" : 3 , "考试3" : 2 , "考试4" : 5 } )
bonus + students[ [ "考试1" , "考试2" , "考试3" , "考试4" ] ]
students[ "考试4" ] = students[ "考试4" ] + 10
students
5、DataFrame3
import pandas as pd
students = pd. DataFrame( { "001" : { "考试1" : 85 , "考试2" : 95 , "考试3" : 92 } , "002" : { "考试1" : 91 , "考试2" : 92 , "考试3" : 94 } ,
"003" : { "考试1" : 86 , "考试2" : 81 , "考试3" : 89 } , "004" : { "考试1" : 79 , "考试2" : 89 , "考试3" : 95 } ,
"005" : { "考试1" : 96 , "考试2" : 91 , "考试3" : 91 } , "006" : { "考试1" : 81 , "考试2" : 81 , "考试3" : 92 } } )
students = students. T
students. mean( )
students. mean( axis= 1 )
def trim_mean ( data) :
data_len = len ( data)
data_sum = data. sum ( )
max_num = data. max ( )
min_num = data. min ( )
return ( data_sum - max_num - min_num) / ( data_len - 2 )
students. apply ( trim_mean)
students. apply ( trim_mean, axis= 1 )
students. applymap( lambda x: x + 5 )
students. describe( )
DataFrame练习3
import pandas as pd
import numpy as np
students = pd. DataFrame( { "001" : { "姓名" : "小陈" , "考试1" : 85 , "考试2" : 95 , "考试3" : 92 } , "002" : { "姓名" : "小李" , "考试1" : 91 , "考试2" : 92 , "考试3" : 94 } ,
"003" : { "姓名" : "小王" , "考试1" : 86 , "考试2" : 81 , "考试3" : 89 } , "004" : { "姓名" : "小张" , "考试1" : 79 , "考试2" : 89 , "考试3" : 95 } ,
"005" : { "姓名" : "小赵" , "考试1" : 96 , "考试2" : 91 , "考试3" : 91 } , "006" : { "姓名" : "小周" , "考试1" : 81 , "考试2" : 81 , "考试3" : 92 } } )
students = students. T
average = students. loc[ : , "考试1" : "考试3" ] . mean( axis= 1 )
name = students[ "姓名" ]
student_average = pd. DataFrame( { "姓名" : name, "平均分" : average} )
student_average
students. loc[ : , "考试1" : "考试3" ] . apply ( lambda x : np. sort( x) [ - 2 ] )
def average_students ( average) :
if average >= 95 :
return "A+"
elif average >= 90 :
return "A"
elif average >= 85 :
return "B+"
elif average >= 80 :
return "B"
elif average >= 75 :
return "C+"
else :
return "C"
students. loc[ : , "考试1" : "考试3" ] . applymap( average_students)
students[ "考试1" ] = students[ "考试1" ] . astype( 'int' )
students[ "考试2" ] = students[ "考试2" ] . astype( 'int' )
students[ "考试3" ] = students[ "考试3" ] . astype( 'int' )
students. describe( )