python数据分析--matplotlib基础

最新推荐文章于 2023-11-27 15:00:53 发布

SunChao3555

最新推荐文章于 2023-11-27 15:00:53 发布

阅读量379

点赞数

分类专栏： Python 文章标签： matplotlib

本文链接：https://blog.csdn.net/SunChao3555/article/details/81288818

版权

Python 专栏收录该内容

41 篇文章 1 订阅

订阅专栏

#coding:utf-8
import scrapy
import xlwt, lxml
import re, json
import matplotlib.pyplot as plt

import numpy as np
np.random.seed(12345)
plt.rc('figure',figsize=(10,6))
import pylab,os
from scipy import linalg
import pandas as pd
from pandas import DataFrame,Series
np.set_printoptions(precision=4)

'''
#Beyond柱状图：可视化的理论介绍
    #如何使用python进行初步的可视化工作
    #Coding实战
#知道画什么，比知道怎么画更重要
#tableau最牛的可视化工具
#KISS Keep It Simple Stupid
#可视化设计原则
    位置，大小，纹理，颜色，方向，形状
    文本可视化
    时序数据可视化（time）
    高维数据可视化 平行坐标
    机器学习相关可视化
    图可视化
    科学可视化
    交互（d3js.org）->[echart]
    情感可视化
'''

# alpha 透明度

'''
#figure,subplot
fig=plt.figure()
ax1=fig.add_subplot(2,2,1)
ax2=fig.add_subplot(2,2,2)
ax3=fig.add_subplot(2,2,3)
plt.plot(np.random.randn(50).cumsum(),'k--')
_=ax1.hist(np.random.randn(100),bins=20,color='r',alpha=0.75) #将向量x中数据（100个随机数1）等分为20组,alpha为颜色深度的系数
ax2.scatter(np.arange(30),np.arange(30)+3*np.random.randn(30))#散点图(x,y)
# print(np.arange(30))
plt.close('all')
#调整subplot间距
plt.subplots_adjust(left=None,bottom=None,right=None,top=None,wspace=None,hspace=None)
fig,axes=plt.subplots(2,2,sharex=True,sharey=True)
for i in range(2):
    for j in range(2):
        axes[i,j].hist(np.random.randn(500),bins=50,color='k',alpha=0.5)
plt.subplots_adjust(wspace=0,hspace=0)
plt.figure()
plt.plot(np.random.randn(30).cumsum(),'ko--')


pylab.show()
plt.close('all')

data=np.random.randn(30).cumsum()
plt.plot(data,'k--',label='Default')
plt.plot(data,'k-',drawstyle='steps-post',label='steps')
plt.legend(loc='best')#将多个.plot()放到一个subplot中显示
pylab.show()
plt.close('all')

fig2=plt.figure()
ax=fig2.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())
ticks=ax.set_xticks([0,250,500,750,1000])
labels=ax.set_xticklabels(['one','two','three','four','five'],rotation=30,fontsize='small')
ax.set_title('some random lines')
ax.set_xlabel('stage')
pylab.show()
plt.close('all')
'''

'''
#subplot做标记
from datetime import datetime
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
data=pd.read_csv('apple.csv',index_col=0,parse_dates=True)
spx=data['close']
spx.plot(ax=ax,style='k-')
crisis_data=[
    (datetime(2017,10,11),'Peak of bull market'),
    (datetime(2018,3,12),'Bear Stearns Fails'),
    (datetime(2018,7,15),'Lehman Bankruptcy'),
]

for date,label in crisis_data:
    # print(spx.asof(date))
    ax.annotate(label,xy=(date,spx.asof(date)+10),xytext=(date,spx.asof(date)+30),arrowprops=dict(facecolor='black'),horizontalalignment='left',verticalalignment='top')#对于crisis_data数组中的三个日期点分别标记，名称，xy位置，xy标记名称位置
ax.set_xlim(['7/25/2017','7/25/2018'])#设置x轴范围
ax.set_ylim([100,250])#设置y轴范围
ax.set_title('apple in 2018')


pylab.show()
plt.close('all')


'''


'''
#画几何图形

fig=plt.figure()
ax=fig.add_subplot(1,1,1)
rect=plt.Rectangle((0.2,0.75),0.4,0.15,color='k',alpha=0.3)
cir=plt.Circle((0.7,0.2),0.15,color='b',alpha=0.3)
pgon=plt.Polygon([[0.15,0.15],[0.35,0.4],[0.2,0.6]],color='g',alpha=0.5)#多边形

ax.add_patch(rect)
ax.add_patch(cir)
ax.add_patch(pgon)
pylab.show()

#存储
fig.savefig('fig1.svg')
fig.savefig('fig1.png',dpi=400,bbox_inches='tight')
from io import BytesIO
buffer=BytesIO()
plt.savefig(buffer)
plot_data=buffer.getvalue()

'''
'''
#Plotting functions in pandas
#Line plots

s=Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
# s.plot()
plt.plot(s,label='Series')
columns=['A','B','C','D']
df=DataFrame(np.random.randn(10,4).cumsum(0),columns=columns,index=np.arange(0,100,10))
# df.plot()
# plt.plot(df)
for i in columns:
    plt.plot(df[i],label=i,alpha=0.3)
plt.legend(loc='best')

pylab.show()
pylab.close('all')
'''
'''
#条形图
pylab.close('all')
fig,axes=plt.subplots(2,1)
data=Series(np.random.rand(16),index=list('abcdefghijklmnop'))
data.plot(kind='bar',ax=axes[0],color='k',alpha=0.7)
data.plot(kind='barh',ax=axes[1],color='k',alpha=0.7)
#rand()大于0的随机数

df=DataFrame(np.random.rand(6,4),index=['one', 'two', 'three', 'four', 'five', 'six'],columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df.plot(kind='bar')
plt.figure()
df.plot(kind='barh', stacked=True, alpha=0.5)#stack堆叠显示
'''

'''
tips=pd.read_csv('tips.csv')
plt.figure()
party_counts=pd.crosstab(tips.day,tips.size)
print(party_counts)
party_counts=party_counts.ix[:,2:5]
print(party_counts)
tips['tip_pct']=tips['tip']/tips['total_bill']
tips['tip_pct'].hist(bins=50)
plt.figure()
tips['tip_pct'].plot(kind='kde')#机器学习或数据分析常用的一种方式：核密度估计
plt.figure()
comp1=np.random.normal(0,1,size=200)#N(0,1)
print(comp1)
comp2=np.random.normal(10,2,size=200)#N(10,4)
val=Series(np.concatenate([comp1,comp2]))
val.hist(bins=100,alpha=0.3,color='k',normed=True)
val.plot(kind='kde',style='k--')

pylab.show()
'''


'''
#Scatter plot
macro=pd.read_csv('macrodata.csv')
data=macro[['cpi','m1', 'tbilrate', 'unemp']]
trans_data=np.log(data).diff().dropna()#取对数（e）,然后取前后数的差
td=trans_data[-5:]
print(td)
plt.figure()
plt.scatter(trans_data['m1'],trans_data['unemp'])
plt.title('Changes in log %s vs. %s'%('m1','unemp'))
plt.figure()
import pandas.plotting
pd.plotting.scatter_matrix(trans_data,diagonal='kde',color='k',alpha=0.3)#绘制出不同列属性之间俩俩关系的散点图，(diagonal)对角线为列属性自身的分布估计(核密度估计)

plt.show()
'''
# Plotting Maps:base map