# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
from pandas import * #主要有Series, DataFrame两个数据结构
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei'] #添加汉字/ #用来正常显示中文标签
mpl.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#step_1 series
'''Series的基本格式是s = Series(data, index=index, name=name)'''
a = np.random.rand(5)
print("a is an array")
print(a)
b = Series(a, index=['a', 'b', 'c', 'd', 'e'], name='random_5')
print("b is a Series")
print(b)
print("...........................")
'''字典直接series'''
d = {'a': 0., 'b': 1, 'c': 2}
print("d is a dict:")
print(d)
s = Series(d)
print("s is a Series:")
print(s)
print("...........................")
# step_2 datafram
'''DataFrame是将数个Series按列合并而成的二维数据结构'''
# 字典创建datafram
f = {'one': Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']), 'two': Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])}
df = DataFrame(f)
print(df)
print("...........................")
'''按照索引取相应的值'''
dg = DataFrame(f, index=['r', 'd', 'a'], columns=['two', 'one', 'three'])
print(dg)
print("...........................")
# concat创建datafranm
index = ['a', 'b', 'c', 'd', 'e']
a = Series(range(5), index=index)
b = Series(np.linspace(4, 20, 5), index=index) # 生成等差数列 arange 也可以生成数列(1, 100, 40)1到100生成40个
c = Series(np.linspace(4, 20, 5), index=index) # 生成等差数列
d = Series(np.linspace(1, 20, 5), index=index) # 生成等差数列
dfa = pd.concat([a, b, c], axis=1) # axis为1 表示按列合并
dfa1 = pd.concat([d, c], axis=1)
dfa.columns = ['lu', 'zao', 'fa'] # 暴力更改列名
dfa1.columns = ['aq', 'fa'] # 暴力更改列名
print(dfa)
print("...........................")
h = dfa.ix[:, [0, 2]] # 取出第一和第三列所有行的值(使用.ix)
print(h)
print("...........................")
'''日期函数'''
dates = Series(pd.date_range('20150101', periods=5), index)
df2 = pd.DataFrame({'A': 1., 'B': pd.Timestamp('20150214'), 'C': pd.Series(1.6, index=list(range(5)), dtype='float64'),
'D': np.array([5] * 5, dtype='int64'), 'E': 'hello pandas!'})
print(df2)
df = pd.concat([dates, dates, dates], axis=1)
df.columns = ['lu', 'zao', 'fa'] # 暴力更改列名
print(df)
# 按照列合并df 与 df2
df3 = pd.concat([df2, df], axis=1)
print(df3)
# 合并(区别concat和merge的区别)
a1 = dfa
a2 = dfa1
print(a1)
print("********************************")
print(a2)
print("********************************")
# dat1 = df[['secID', 'tradeDate', 'closePrice']]
# dat2 = df[['secID', 'tradeDate', 'turnoverVol']]
dat = a1.merge(a2, on=['fa']) # 按照指定fa列进行合并
print(dat)
print("********************************")
# 数据可视化
width = [1, 2, 3, 4, 5]
# height = a2.aq
height = [1, 3, 6, 2, 9]
plt.plot(width, height, color="r", marker="*") #折线图
plt.title('卢造发 made it!', fontsize=20)
plt.xlabel("时 间", fontsize=15)
plt.ylabel("成绩", fontsize=15, rotation=40)
group_labels = ["星期一", '星期二', "星期三", "星期四", "星期五"]
#显示特定横线坐标(1)
plt.xticks(width, group_labels, rotation=-40)
plt.tight_layout()
# savefig("F:/anaconda/work/downloads/picture/a1111000133333.jpg") #存储图片
plt.show()
'''
writer = pd.ExcelWriter('F:/anaconda/sales_sum.xlsx', engine='xlsxwriter')
#储存到规定目录下,并命名工作簿
h.to_excel(writer, 'zaofa', index=False) #为工作表起名
writer.save() #储存文件
'''