pandas 120道练习题目
1-20 道题目
1 将下面的字典创建为DataFrame
import pandas as pd
import numpy as np
data= { "grammer" : [ 'python' , "c" , "java" , "go" , np. nan, "sql" , "python" ] ,
"score" : [ 1 , 2 , np. nan, 4 , 5 , 5 , 7 ] }
df= pd. DataFrame( data)
df
grammer score 0 python 1.0 1 c 2.0 2 java NaN 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 python 7.0
2 提取含有字符串“python”的行
df[ df[ 'grammer' ] == "python" ]
grammer score 0 python 1.0 6 python 7.0
result= df[ 'grammer' ] . str . contains( "python" )
result. fillna( value= False , inplace= True )
df[ result]
grammer score 0 python 1.0 6 python 7.0
3 输出df的所有列名
df. columns
Index(['grammer', 'score'], dtype='object')
df. columns. tolist( )
['grammer', 'score']
4 修改第二列列名为“分数”
df. rename( columns= { "score" : "分数" } , inplace= True )
5 统计grammer列中每种编程语言出现的次数
df[ "grammer" ] . value_counts( )
python 2
c 1
java 1
go 1
sql 1
Name: grammer, dtype: int64
6 将空值用上下值的平均值填充
df[ "分数" ] = df[ "分数" ] . fillna( df[ "分数" ] . interpolate( ) )
df
grammer 分数 0 python 1.0 1 c 2.0 2 java 3.0 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 python 7.0
7 提取分数列中大于3的行
df[ df[ "分数" ] > 3 ]
grammer 分数 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 python 7.0
8 按照grammer列进行去除重复值
df[ 'grammer' ] . drop_duplicates( )
0 python
1 c
2 java
3 go
4 NaN
5 sql
Name: grammer, dtype: object
df. drop_duplicates( "grammer" )
9 计算分数列的平均值
df[ '分数' ] . mean( )
3.857142857142857
10 将grammer列转为list
df[ "grammer" ] . to_list( )
11 将DataFrame保存为excel
df. to_excel( "test.xlsx" )
12 查看数据行列数
df. shape
(7, 2)
13 提取分数列值大于3小于7的行
df[ ( df[ "分数" ] > 3 ) & ( df[ "分数" ] < 7 ) ]
grammer 分数 3 go 4.0 4 NaN 5.0 5 sql 5.0
14 交换两列的位置
cols= df. columns[ [ 1 , 0 ] ]
df= df[ cols]
df
15 提取分数列最大值的行
df[ df[ '分数' ] == df[ '分数' ] . max ( ) ]
16 查看最后5行数据
df. tail( 5 )
grammer 分数 2 java 3.0 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 python 7.0
17 删除最后一行数据
df. drop( len ( df) - 1 , inplace= True )
df
grammer 分数 0 python 1.0 1 c 2.0 2 java 3.0 3 go 4.0 4 NaN 5.0 5 sql 5.0
18 添加一行数据[“perl”,6.6]
row= { "grammer" : 'perl' , "分数" : 6.6 }
df= df. append( row, ignore_index= True )
df
C:\Users\86177\AppData\Local\Temp\ipykernel_22912\788235220.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
df=df.append(row,ignore_index=True)
grammer 分数 0 python 1.0 1 c 2.0 2 java 3.0 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 perl 6.6 7 perl 6.6
19 对数据按照“分数”列值的大小进行排序
df. sort_values( by= "分数" )
grammer 分数 0 python 1.0 1 c 2.0 2 java 3.0 3 go 4.0 4 NaN 5.0 5 sql 5.0 6 perl 6.6 7 perl 6.6
20 统计grammer列每个字符的长度
df[ "grammer" ] = df[ "grammer" ] . fillna( "R" )
df[ "len_str" ] = df[ "grammer" ] . map ( lambda x: len ( x) )
df
grammer 分数 len_str 0 python 1.0 6 1 c 2.0 1 2 java 3.0 4 3 go 4.0 2 4 R 5.0 1 5 sql 5.0 3 6 perl 6.6 4 7 perl 6.6 4