from pandas import Series, DataFrame import numpy as np from numpy import nan as NA # pandas中的对齐和运算,都是求交集的。 a1 = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) a2 = Series([1, -2, 3, 4], index=list('acef')) d1 = DataFrame(np.arange(12).reshape(3, 4), columns=list('abcd')) d2 = DataFrame(np.arange(20).reshape(4, 5), columns=list('abcde')) # 可以使用fill_value来填充 # print(d1.add(d2, fill_value = 0)) d1 = d1.reindex(columns=d2.columns, fill_value=0) # 按值排序和按索引排序 s1 = Series([1, 3, 5, 2, 4], index=list('abcde')) # print(s1.sort_values()) # print(s1.sort_index()) # rank,根据出现的次数,给予一个排序值 s2 = Series([1, 3, 2, 3, 2, 4, 4, 4]) # print(s2.rank()) # 求和 # print(s2.sum()) # # 求columns(纵列)的和,空值会自动排除 # print(d1, d1.sum()) # # 求index(横列)的和 # print(d1, d1.sum(axis=1)) # 统计和,并且累加到下一行(广播累加) # print(d1.cumsum()) # 清除na值 d3 = DataFrame([[1, 2, 3, NA, 4], [1, 2, 3, 4, NA], [1, 2, 3, NA, 4], [1, 2, 3, 4, NA]]) cleaned = d3.dropna()
——————————————————————————————————————————
----------------------------------------------------------------------
import pandas as pd import matplotlib.pyplot as plt from pandas import Series, DataFrame import numpy as np # 读取csv文件 d1 = pd.read_csv(r'd1.csv') # 或者使用read_table,只不过要指定分隔符号 d2 = pd.read_table(r'd1.csv', sep=',') # 分配列名,默认分配 header=None d3 = pd.read_csv(r'd1.csv', names=list('abcde')) # 分配索引 d4 = pd.read_csv(r'd1.csv', names=list('abcde'), index_col='d') # 读取行数 d5 = pd.read_csv(r'd1.csv', nrows=3) # 保存为csv文件 # d1.to_csv('d2.csv', index=False) d6 = pd.read_csv(r'd2.csv', header=None)