导包
import pandas as pd
import re
from matplotlib import pyplot as plt
font = {
"family":"Microsoft Yahei"
}
plt.rc("font",**font)
数据读取
data01 = pd.read_csv("data01.csv")
data02 = pd.read_csv("data02.csv")
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/dd1447d6b30448748f970ca634dba1e9.png)
表拼接
data02 = data02.iloc[:,1:]
df = pd.concat([data01,data02],axis=0)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/6df293f158a5f816f61f077f6ca6fda8.png)
去重
df.drop_duplicates(inplace=True)
获取省
def get_province(x):
if "省" not in x:
return "未识别"
else:
return x.split("省")[0]+"省"
df["省份"] = df["工作地"].apply(get_province)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/88e4f4c3bded3e051e2227cccbc14d80.png)
获取市
def get_city(x):
if "市京市" in x:
return "市京市"
elif "市" in x:
if "省" in x:
return x.split("省")[1].split("市")[0]+"市"
else:
return x.split("市")[