从“早起Python” 公众号看到该文章,把内容拿下来练习一下,部分题目加了新解法。更多内容及标准答案请参考作者公众号,感谢作者的分享。
文章原地址:https://mp.weixin.qq.com/s/fvEDcAqWZ-HkoTMxi995dw
数据源下载:链接:https://pan.baidu.com/s/1MSqmWMiurHJSXyNBlJOEpw 密码:8mkx
一些输出结果在调试时有更改,不一定一一对应。
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
#题目:将下面的字典创建为DataFrame
data = {"grammer":["Python","C","Java","GO",np.nan,"SQL","PHP","Python"],
"score":[1,2,np.nan,4,5,6,7,10]}
df = pd.DataFrame(data)
df
# 题目:提取含有字符串"Python"的行
df[df['grammer'] == 'Python']
df[df['grammer'].fillna('').str.contains('Python')] #contains 判断更合适,多个条件时'Python|Java|SQL'
#df
## 多列含有PYthon取出多列多行
df[df.isin(['Python','SQL']).sum(axis=1)>0]
#输出df的所有列名
df.columns.tolist()
df.columns
#题目:修改第二列列名为'popularity'
df.columns=['grammer','popularity']
df.rename(columns={'score':'popularity'},inplace=True)
df
#题目:统计grammer列中每种编程语言出现的次数
df['grammer'].value_counts()
df.groupby('grammer').count()
#题目:将空值用上下值的平均值填充
# pandas里有一个插值方法,就是计算缺失值上下两数的均值
df['popularity'].fillna(df['popularity'].interpolate())
#题目:提取popularity列中值大于3的行
df[df['popularity']>3]
df.query('popularity>3')
# 题目:按照grammer列进行去重
#df.groupby('grammer').tolist()
set(df['grammer'].tolist())
df['grammer'].drop_duplicates()
# 题目:计算popularity列平均值
#df.groupby('grammer')['popularity'].mean()
#df.groupby('grammer').apply(lambda x:x.mean())
df['popularity'].fillna(0).mean() #注意na值处理
# 题目:将grammer列转换为list
df['grammer'].to_list()
# 题目:将DataFrame保存为EXCEL
df.to_csv('C://Users//JR//Desktop//测试.csv')
df
#题目:查看数据行列数
df.info()
df.shape
#题目:提取popularity列值大于3小于7的行
df[(3<df['popularity'])&(df['popularity']<7)]
df.query('3<popularity<7')
#题目:交换两列位置
tmp = df['grammer']
df['grammer'] = df['popularity']
df['popularity'] = tmp
df.rename(columns={'grammer':'popularity','popularity':'grammer'},inplace=True)
df.iloc[:,::-1]
df
#题目:提取popularity列最大值所在行
df[df['popularity'] == df['popularity'].max()]
# 题目:查看最后5行数据
df.tail()
df.iloc[-5:]
df[-5:]
#题目:删除最后一行数据
df.drop(df.popularity.count())
df.drop(['popularity'],axis=1)
df.drop(df.shape[0]-1)
df
# 题目:添加一行数据['Perl',6.6]
df.append({'popularity':6.6,'grammer':'Perl'},ignore_index=True)
#pd.concat([df,])
#添加的是字典类型,必须指定好键值
#题目:对数据按照"popularity"列值的大小进行排序
df.sort_values(['popularity'],ascending=False)
#题目:统计grammer列每个字符串的长度
df['len'] = df['grammer'].fillna('').map(lambda x: len(x))
df['len'] = df['gra