import pandas as pd
import numpy as np
print("************Series*************")
print("=========从Array 生成 Series===========")
#数据类型 dtype: float64,行 列名字默认 0-n 的数字
s = pd.Series([1, 3, 6, np.nan, 44, 1])
print(s)
#数据类型 dtype='datetime64[ns]',生成从指定日期 20190101 开始的 6 的日期
# DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
'2019-01-05', '2019-01-06'], dtype='datetime64[ns]', freq='D')
dates = pd.date_range('20190101',periods= 6)
print(dates)
print("************DataFrame*************")
print("==========从矩阵生成DataFrame=============")
#index 指定行名,columns指定列名,的dataframe
df = pd.DataFrame(np.random.randn(6, 4), index= dates, columns=['a', 'b', 'c', 'd'])
print(df)
# a b c d
# 2019-01-01 1.909770 0.182445 1.301455 0.866027
# 2019-01-02 0.580548 -2.191395 0.668358 -0.866770
# 2019-01-03 1.937158 -0.715154 -0.476099 0.379382
# 2019-01-04 -0.402467 0.821941 -0.322327 0.583404
# 2019-01-05 -0.961753 1.265157 0.917990 0.454411
# 2019-01-06 -1.617632 -1.184158 -0.348426 -0.396559
print("==========从Map字典生成 DataFrame=============")
df1 = pd.DataFrame({'A': 1.,
'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)),dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(['test', 'trait', 'test', 'train']),
'F': 'foo'
})
print(df1)
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 1 1.0 2013-01-02 1.0 3 trait foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 3 1.0 2013-01-02 1.0 3 train foo
print(df1.dtypes)
# A float64
# B datetime64[ns]
# C float32
# D int32
# E category
# F object
# dtype: object
print("=======打印行名、列名、值=========")
print(df1.index)#Int64Index([0, 1, 2, 3], dtype='int64')
print(df1.columns)#Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')
print(df1.values)
# [[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'trait' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
# [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]
print("=============DataFrame 的基本描述==================")
print(df1.describe())#列数,均值,方差,最小值等等,会忽略非数字的值
# A C D
# count 4.0 4.0 4.0
# mean 1.0 1.0 3.0
# std 0.0 0.0 0.0
# min 1.0 1.0 3.0
# 25% 1.0 1.0 3.0
# 50% 1.0 1.0 3.0
# 75% 1.0 1.0 3.0
# max 1.0 1.0 3.0
print("==========DataFrame转置============")
print(df1.T)
print("==========DataFrame排序,按照列名、行名 、 值排序============")
#对
print(df1.sort_index(axis=1, ascending=False))# 1 表示列,False表示倒序,从大到小
# F E D C B A
# 0 foo test 3 1.0 2013-01-02 1.0
# 1 foo trait 3 1.0 2013-01-02 1.0
# 2 foo test 3 1.0 2013-01-02 1.0
# 3 foo train 3 1.0 2013-01-02 1.0
#
print(df1.sort_values(by= 'E'))# by 指定列名
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 3 1.0 2013-01-02 1.0 3 train foo
# 1 1.0 2013-01-02 1.0 3 trait foo