总结一下自己最近学习过程中,使用到的pandas和numpy操作,后面学习过程再慢慢补充:
pandas
import pandas as pd
# read file
df_csv = pd.read_csv(path)
#for txt file without header
df_txt = pd.read_table(path, header = None)
#切片
df.ix[col] #get one column as series
df.ix[[cols]] #get columns as df
#check info
df.head()
df.tail()
df.sample(10) #randomly pick 10 rows in df
df.describe() #summary statistics **for numerical columns**
df.info() #check index data type and memory info
numpy
import numpy as np
np.count_nonzero(a) #count the nonzeros in numpy array a
np.tile(A, N) #copy array A N times;N can be two dimensions
np.argmax(array, axix=1) #返回最大元素的index
np.loadtxt() #Each row in the text file must have the same number of values
np.genfromtxt(filepath, fillingvalue=) #可以处理文件中有缺失值的情况,默认缺失值被替代为nan,可以通过fillingvalue改变缺失值的替换