from recall import config from sklearn.model_selection import train_test_split # 1.dataframe的遍历 import pandas as pd import os # 设置pandas的全部列数 pd.set_option('display.max_columns', None) data_path='../raw_data' meta_music = "../raw_data/music_meta" df = pd.read_csv(os.path.join(data_path, 'music_meta'), sep="\001", names=['item_id', 'item_name', 'desc', 'total_len', 'loc', 'tags'], nrows=4) df.fillna('-') print("---几种不同的迭代方式,根据不同要求取舍-----") print("-------------- 1 -----------------") # 输出index ,Series类型(只有两列) for row in df.iterrows(): print(row[1]['total_len']) break print("-------------- 2 -----------------") # 输出tuple for row in df.itertuples(): print(row[0], " ", row[1], " ", row[2], " ", row[3]) break print("-------------- 3 -----------------") # 输出name ,Series类型(只有两列) for row in df.iteritems(): print(row[1][2]) break # 注意:iterrows()和iteritem是不一样的,前者是确实对行进行遍历,后者是对列进行遍历 print("-------------------------") print(df.head(1)) print("-------------------------") # 练习,从item_id中找出item_id为29900100的行 for row in df.iteritems(): # 这里不能使用dtype print(type(row[1][2])) if row[1][2] == 29900100: print("哈哈!!!找到了")