#打开txt文件
#打开txt文件
with open('day02.txt') as f:for line inf.readlines():
aline=line.strip()
bline=aline.split(",")print (bline)
数据合并
pd.merge(data,df,on=['appid'])
数据聚合
t_data=list1.groupby(['uid','appid',...]) 按照某列
csv数据合并:
defhebing():
csv_list= glob.glob('*.csv')print(u'共发现%s个CSV文件'%len(csv_list))print(u'正在处理............')for i incsv_list:
fr= open(i,'r').read()
with open('t_data.csv','a') as f:
f.write(fr)print(u'合并完毕!')defquchong(file):
df= pd.read_csv(file,header=0)
datalist=df.drop_duplicates()
datalist.to_csv(file)if __name__ == '__main__':
hebing()
quchong("t_data.csv")
读取片段:
df = pd.read_csv("annotations.csv")[0:10]
按照列名读取
data = pd.read_csv('rfm.csv',usecols=['appid','duration','avg_flow'])
要把第三列数据中的0值删除,今天弄了好几个小时,写了个循环,还是不行,最后发现,只要选择数据中大于0的就可以了
data = pd.read_csv('rfm.csv',usecols=['appid','duration','avg_flow']) #读取数据中的3列数据
data=data[data.avg_flow>0]#选择大于0的数据
另一种思路,是把该列中数据为0的值挑出来,