一 学习数据挖掘,如果是用Python的话,必须掌握好科学计算的相关库,我先学习了pandas的一些具体操作代码
#-*- encoding:utf-8 -*-
import numpy as np
import os
import pylab as pl
import pandas as pd
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
# s = pd.Series([1, 2, 3, np.nan, 4, 5])
# # print s
# dates = pd.date_range('20171001',periods=6)
# # print dates
# df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
# #index索引 columns 纵列
# # print df
# df2 = pd.DataFrame({
# 'A':20.,
# 'B':pd.Timestamp('20171026'),
# 'C':pd.Series(1,index=list(range(4)),dtype='float32'),
# 'D':np.array([3] * 4 ,dtype = 'int32'),
# 'E':pd.Categorical(['a','b','c','d']),
# 'F':'chen'
#
# print df2
# print df2.dtypes
# print df.head()#头
# print df.tail()#尾
# print df.index#索引
# print df.columns4
# print df.values
# print df.describe()一些数学数据
# print df.T#转置矩阵
# print df.sort_index(axis=1,ascending=False)#ABCD排序
# print df.sort_values(by='B')#只排序B列
# print df['A']
# print df[0:3]#前三行
# print df['20171001':'20171003']
# print df.loc[dates[0]]#出现第一行,变为竖列
# print df.loc[:,['A','B']]
# print df.loc[dates[0],['A','B']]#dates表示就是第几行的数据
# print df.loc['20171001