Python pandas㈠练习
一、处理网上招聘数据
import pandas as pd
results = pd.DataFrame({'城市':['武汉','上海','深圳','广州','苏州','南京','成都','北京','杭州'],
'Python职位数':['1.2K','7.9K','6.4K','2.5K','910','1.3K','1.6K','8.7K','2.5K'],
'平均月薪':['12.0K','17.2K','16.8K','12.8K','12.8K','13.3K','11.6K','19.2K','15.5K']})
#添加一行数据
results.loc[9] = ['西安','664','12.9K']
results
#更改格式
jobs = results['Python职位数']
for i in range(len(jobs)):
if 'K' in jobs[i]:
jobs[i] = int(float(jobs[i].replace('K','')) *1000)
else:
jobs[i] = int(jobs[i])
results['Python职位数'] = jobs
#更改格式
salarys = results['平均月薪']
for i in range(len(salarys)):
if 'K' in salarys[i]:
salarys[i] = int(float(salarys[i].replace('K','')) *1000)
else:
salarys[i] = float(salarys[i])
results['Python职位数'] = salarys
#从高到低排序 输出前五名
results.sort_values(by="平均月薪",axis=0,ascending=False,inplace=True)
results.loc[:,['城市','平均月薪']].head(5)
results.sort_values(by="Python职位数",axis=0,ascending=False,inplace=True)
results.loc[:,['城市','Python职位数']].head(5)
二、案例实现
如下表所示,已知一些人员的年龄(age)、性别(gender)、和月薪(salary)信息表、人员信息表。
import pandas as pd
data = [[25,'female',4000],[30,'male',8000],[22,'/',8000],[28,'male',5000]]
columns = ['age','gender','salary']
people = pd.DataFrame(data=data,columns=columns)
#第一问
people.loc[2:2,['gender']]=['female']
print(people)
#第二问
people.loc[4]=[35,'male',1000]
print(people)
#第三问
people.sort_values(by='salary',axis=0,ascending=False,inplace=True)
print(people)
people.loc[1:3,['age','gender','salary']]
#第四问
people[people['gender']=='male']
#第五问
people[(people['age']==28)| (people['age']==35)]