# -*-coding:utf-8-*-
from pandas import DataFrame
import pandas as pd
import numpy as np
"""
获取行列数据
"""
df = DataFrame(np.random.rand(4, 5), columns=["A", "B", "C", "D", "E"])
print df
df["col_sum"] = df.apply(lambda x: x.sum(), axis=1) # 横向求和,axis=1表示横向
df.loc["row_sum"] = df.apply(lambda x: x.sum()) # loc获取一整列的数据,对一列数据进行求和
print df
dd = pd.DataFrame(np.arange(0, 60, 2).reshape(10, 3), columns=list("abc"))
# loc获取一整列的数据
print dd
print dd.loc[0:len(dd), "a"]
print dd.loc[0:3, ["a", "b"]]
print dd.loc[[1, 5], ["b", "c"]]
print "--------------------------------------"
# iloc获取某个位置的元素,或者某个区域的元素
print dd.iloc[1, 1]
print dd.iloc[0:3, [0, 1]]
print dd.iloc[[0, 3, 5], 0:2]
print "--------------------------------------"
"""
去重函数 drop_duplicates()
"""
from pandas import Series, DataFrame
data = DataFrame({"k": [1, 1, 2, 2]})
print data
print type(data) #
isduplicates = data.duplicated() # duplicated()判断是否是重复的项