使用pandas进行数据处理,解放人力!拒绝hp工作!
import pandas as pd
import numpy as np
df1 = pd.read_csv('boy_115-215.csv',encoding='gb18030') #Read a comma-separated values (csv) file into DataFrame.
df2 = pd.read_csv('boy_Final.csv',encoding='gb18030') #一种编码方式,文件以csv存储
#df1(columns=['序号', '地区'])
#df1.loc('序号')
#print(df1.to_string())
#print(df1.index)
#print(df1.columns) #按列打印出来
#print(df2.columns)
#print(df1['序号'])
#空白数据清理
# df2['序号'].fillna(value=0,inplace=True) #replace =True 源数据才会发生改变 fillna 填充 na/NaN的数据
#数据类型转换
# df2['序号'].astype(int)
# df2['序号'] = df2['序号'].astype(int) #数据类型转换
# print(df2['序号'])
print(df1.to_string())
print(df2.to_string())
for i in range(0,102):
for j in range(0,606):
if df1['序号'][i] == df2['序号'][j]:
df2['第一意向区域'][j] = df1['第一意向区域'][i]
df2['第二意向区域'][j] = df1['第二意向区域'][i]
# df2['地区'].fillna(value=0,inplace=True) #replace =True 源数据才会发生改变
# df2['地区'].astype(int)
print(df2.to_string())
df2.to_csv('table3_new.csv') #输出到目录下,存为csv