Bar plotting or hist plotting

In [74]: import numpy as np

             import matplotlib.pyplot as plt

In [75]: data = [5., 25., 50, 20]

In [79]: plt.bar( range(len(data)), data )

             plt.show()

https://i-blog.csdnimg.cn/blog_migrate/d282158e57caea9ea757505d1cbce696.png

In [80]: range(len(data))

Out[80]: range(0, 4)

In [81]: plt.barh( range(len(data)), data)

             plt.show()

https://i-blog.csdnimg.cn/blog_migrate/317492087e05a4a216e132408c5405a7.png

In [ ]:

In [82]: import matplotlib.pyplot as plt

In [83]: data = [5,25,50,20]

In [84]: plt.bar( range(len(data)), data, width=1)

             plt.show()

https://i-blog.csdnimg.cn/blog_migrate/6c92f7ae6f8d15d49acec00c51e861e3.png

In [87]: plt.barh( range(len(data)), data, height=1)

             plt.show()

https://i-blog.csdnimg.cn/blog_migrate/afc419cf112ba9c1641adb1946f3c385.png

In [89]:

In [ ]:

17

In [90]: import numpy as np

import matplotlib.pyplot as plt

In [96]: data = [ [5,25,50,20], [4,23,51,17], [6,22,52,19] ]

             XList = np.arange(4)

In [97]: w=0.25

             plt.bar(XList+w*0, data[0], color='b', width=w)  #w*0 or w*1 or w*2 做偏移

             # center the first blue bar to 0.0

             plt.bar(XList+w*1, data[1], color='g', width=w)

             #width=w  is the gap

             plt.bar(XList+w*2, data[2], color='r', width=w)  

             # center the first red bar to 0.5

             plt.show()

https://i-blog.csdnimg.cn/blog_migrate/c08141616277697f542310fb6d9eae50.png

In [ ]:

In [99]: import numpy as np

             import matplotlib.pyplot as plt

In [100]: data = [[5,25,50,20], [4,23,51,17], [6,22,52,19]]

              color_list=['b','g','r']

              gap = .8/len(data)   

In [101]: for i, row in enumerate(data): #the iterator enumerate returns both the current row and its i

                     X = np.arange(len(row))

                     plt.bar(X+i*gap, row, width=gap, color = color_list[ i%len(color_list) ])

                                                                               #i%len(color_list) if len(data)>3

               plt.show()

https://i-blog.csdnimg.cn/blog_migrate/96bdff26dacb26a8784032c35b175048.png

In [ ]:

In [102]: import matplotlib.pyplot as plt

In [106]: A = [5,30,45,22] B = [5,25,50,20]

              XList= range(len(A))

In [107]: plt.bar(XList, A, color='b')

        plt.bar(XList, B, color='r', bottom=A) #default width: 0.8

         plt.show()

https://i-blog.csdnimg.cn/blog_migrate/bc7355148fd60fb0ffcb8cfb265fe146.png

In [ ]:

In [108]: import numpy as np

               import matplotlib.pyplot as plt

In [109]: A = np.array([5,30,45,22])

               B = np.array([5,20,50,20])

               C = np.array([1,2,1,1])

               xList=np.arange(4)

In [110]: plt.bar(xList, A, color='b')

               plt.bar(xList, B, color='y', bottom=A)

               plt.bar(xList, C, color='r', bottom=A+B)

               plt.show()

20https://i-blog.csdnimg.cn/blog_migrate/38c67ffbeb636fd3caa12506ff85d453.png

In [ ]:

In [111]: import numpy as np

              import matplotlib.pyplot as plt

In [112]: data=np.array([ [5,30,45,22], [5,20,50,20], [1, 2, 1, 1] ])

              colorList = ['b', 'y','r']

              xList = np.arange(data.shape[1]) #data.shape[1] return the number of columns

              for i in range(data.shape[0]): #axis=0 to remove row: sum by column

                     plt.bar(xList, data[i], bottom=np.sum(data[:i], axis=0), color=color_list[ i%len(color_list) ])

              plt.show()

21https://i-blog.csdnimg.cn/blog_migrate/38c67ffbeb636fd3caa12506ff85d453.png

In [ ]:

In [113]: import numpy as np

              import matplotlib.pyplot as plt

In [115]: women_pop = np.array([5,30,45,22])

               men_pop = np.array([5,25,50,20])

               xList= np.arange(4)

In [116]: plt.barh(xList, women_pop, color='r')

              plt.barh(xList, -men_pop, color='b')

              plt.show()

https://i-blog.csdnimg.cn/blog_migrate/d57d91740f6a230cc15b5d630e16a193.png

In [ ]:

In [119]: import numpy as np

               import matplotlib.pyplot as plt

In [120]: xList = np.random.randn(1000)

In [121]: plt.hist(xList, bins=20)

               plt.show()

https://i-blog.csdnimg.cn/blog_migrate/314fe6e0bd29770879af4e0c1723ff9b.png

In [122]: plt.hist(xList, bins=50)

               plt.show()

https://i-blog.csdnimg.cn/blog_migrate/f3fbf860b5f37f39d6e32eaa81751899.png

In [ ]:

3 Using custom colors for bar charts

In [13]: import numpy as np

             import matplotlib.pyplot as plt

In [14]: women_pop = np.array([5.0, 30.,45., 22.])

             men_pop = np.array([5.0, 25., 50., 20.])

In [15]: X=np.arange(4) #0~3

In [18]: plt.barh(X, women_pop, color='0.25')

             plt.barh(X, -men_pop, color='0.75')# The parameter edgecolor is alsoavailable

             plt.show().

Out[18]:

In [19]: import numpy as np

             import matplotlib.pyplot as plt

In [21]: values = np.random.randint(99, size=50) #generating 50 numbers with 0<=values<=99

In [22]: values

Out[22]: array([ 8, 21, 30, 9, 74, 0, 91, 97, 81, 80, 21, 47, 18, 3, 81, 53, 22, 84, 50, 2, 33, 82, 93, 89, 51, 71, 87, 48, 0, 57, 15, 38, 66, 48, 75, 98, 46, 35, 33, 20, 28, 30, 20, 80, 83, 68, 29, 13, 38, 61])

 

In [25]: color_set = ('.00','.25','.50','.75')

##Python中的 // 与 / 的区别, " / " 表示浮点数除法

             color_list=[ color_set[ (len(color_set) * val) //100 ] for val in values ]

             plt.bar(np.arange(len(values)), values, color=color_list)

             plt.show()

 

In [26]: (len(color_set) * 8) //100

Out[26]: 0

In [28]: color_set = ('.00','.25','.50','.75')

             ##Python中的 // 与 / 的区别, " / " 表示浮点数除法, "//"表示整数除法

             color_list=[ color_set[ (len(color_set) * val) //100 ] for val in sorted(values) ] #sort the v

             plt.bar(np.arange(len(values)), values, color=color_list)

             plt.show()

7 Using colormaps for bar charts

In [11]: import numpy as np

             import matplotlib.cm as cm

             import matplotlib.colors as col

             import matplotlib.pyplot as plt

In [13]: values = np.random.randint(99, size=50) # 50 numbers (0~99)

In [14]:                               #normalize data into the [0.0, 1.0] interval

             cmap = cm.ScalarMappable(col.Normalize(0,99), cm.binary)

In [16]:                               #converts the list of values to a list of color

            plt.bar(np.arange(len(values)), values, color = cmap.to_rgba(values))

            plt.show()

# # we use the linestyle parameter of pyplot.plot() to control the line
# pattern of three different curves. The following line styles are available:
#  ### Solid
#  ### Dashed
#  ### Dotted
#  ### Dashdot

The line style with other plot types

# In[24]:
import numpy as np
import matplotlib.pyplot as plt


# In[25]:
N=8
A=np.random.random(N)
B=np.random.random(N)
X=np.arange(N)


# In[60]:


plt.bar(X, A, color='0.75')
                                                                    ##edgecolor='y'
b=plt.bar(X, A+B, bottom=A, color='w', linestyle='dashed', linewidth=1,edgecolor='y'
plt.show()

Controlling a fill pattern
 hatch pattern
#  /
#  \
#  |
#  -
#  +
#  x
#  o
#  O
#  .
#  *

edgecolor parameter will control the color of the hatching.

# In[70]:
import numpy as np
import matplotlib.pyplot as plt


# In[72]:
N = 8
A = np.random.random(N)
B = np.random.random(N)
X = np.arange(N)


# In[77]:
#edgecolor parameter will control the color of the hatching.
plt.bar(X, A, color='w', hatch='x', edgecolor='k')
plt.bar(X, A+B, bottom=A, color='W', hatch='/', edgecolor='k')

plt.show()

 

Bar Plots

The plot.bar() and plot.barh() make vertical and horizontal bar plots, respectively

 

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

fig, axes = plt.subplots(2,1) #row=2  column=1

data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop')) #rand(): [0,1)

 

data.plot.bar(ax=axes[0], color='k', alpha=0.7, rot=0)

data.plot.barh(ax=axes[1],color='b', alpha=0.7)

 

plt.show()

 

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

df = pd.DataFrame(np.random.rand(6,4),

                  index=['one', 'two', 'three', 'four', 'five', 'six'],

                  columns=pd.Index(['A','B','C','D'], name='Genus'))

df

df.plot.barh(stacked=True,alpha=0.5,rot=0)

plt.legend(loc='upper right',title='Genus')

plt.show()

A useful recipe for bar plots is to visualize a Series’s value frequency using value_counts: s.value_counts().plot.bar().

#######################################################

tips.csv

#######################################################

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

                                     #axis[0]   #axis[1]

party_counts = pd.crosstab(tips['day'], tips['size'])

party_counts

party_counts = party_counts.loc[:,2:5]  #label[2,3,4,5]

party_counts

                        # Normalize to sum to 1

party_pcts = party_counts.div(party_counts.sum(1), axis=0)

#groub by (axis=0) day, 16/(16+1+1+0)= 0.888889

party_pcts

party_pcts.plot.bar(rot=90)

plt.show()

Conclusion:

So you can see that party sizes appear to increase on the weekend in this dataset.

 

seaborn

import seaborn as sns

import matplotlib.pyplot as plt

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

 

#0.063204       = 1.01        / (16.99              - 1.01       )

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()

sns.barplot(data=tips, x='tip_pct', y='day', orient='h')

sns.set(style=None)

plt.show() #The black lines drawn on the bars represent the 95% confidence interval

sns.barplot(data=tips, x='tip_pct', y='day', orient='h', hue='time')

sns.set(style='whitegrid')

plt.legend(loc='center right', title='time')

plt.show()

help(sns.set)

help(sns.axes_style)

 

Histograms and Density Plots

A histogram is a kind of bar plot that gives a discretized display of value frequency. The data points are split into discrete, evenly spaced bins, and the number of data points in each bin is plotted.

import seaborn as sns

import matplotlib.pyplot as plt

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])   #sorted then split the data points (depend on their values)

tips.head()

tips['tip_pct'].plot.hist(bins=50) #sorted tips['tip_pct'] then split the data points (depend on their values) to 50 bins

plt.title('Histogram of tip percentages')

plt.show()

A related plot type is a density plot, which is formed by computing an estimate of a

continuous probability distribution that might have generated the observed data.

 

density plots are also known as kernel density estimate (KDE) plots.

Using plot.kde makes a density plot using the conventional mixture-of-normals estimate

 

tips['tip_pct'].plot.density()

plt.title('Density plot of tip percentages')

plt.show()

高斯分布(Gaussian Distribution)的概率密度函数(probability density function):

np.random.randn(size)所谓标准正态分布(μ=0,σ=1),对应于np.random.normal(loc=0, scale=1, size)

                                            #normal distribution mu=0, sigma=1=std.dev

Seaborn makes histograms and density plots even easier through its distplot

method, which can plot both a histogram and a continuous density estimate simultaneously.

 

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

 

                      #mu=0  sigma=1=std.dev  sampling size=200

comp1 = np.random.normal(0,1,size=200)

                      #mu=10, sigma=2=std.dev

comp2 = np.random.normal(10,2, size=200)

values= pd.Series(np.concatenate([comp1, comp2]))

sns.distplot(values, bins=100, color='k')

 

plt.title('Normalized histogram of normal mixture with density estimate')

plt.show()

Figures and Subplots

Plots in matplotlib reside within a Figure object.

import matplotlib.pyplot as plt

 

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

#pass no label or label='_nolegend_'

ax.plot(randn(1000).cumsum(), color='k', label='one')

ax.plot(randn(1000).cumsum(), color='k', linestyle='--', label='two')

ax.plot(randn(1000).cumsum(), color='k', linestyle='dotted',label='three')

 

ticks = ax.set_xticks([0,250,500,750,1000])

labels = ax.set_xticklabels(['one','two', 'three', 'four', 'five'], rotation=30, fontsize='small')

 

ax.set_title('My first matplotlib plot')

ax.set_xlabel('Stages')

props = {

        'title': 'My first matplotlib plot',

        'xlabel': 'Stages'

}

ax.set(**props)

ax.legend(loc='best')

plt.show()

matplotlib draws on the last figure and subplot used (creating one if necessary), thus hiding the figure and subplot creation.

plt.plot(np.random.randn(50).cumsum(), color='black', ls='--')

 

matplotlib includes a convenience method, plt.subplots, that creates a new figure and returns a NumPy array containing the created subplot objects, the axes array can be easily indexed like a two-dimensional array; for example, axes[0, 1].

Adjusting the spacing around subplots

plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

 

# 1

fig, axes = plt.subplots(2,2, sharex=True, sharey= True)

for i in range(2):

    for j in range(2):

        axes[i,j].hist(np.random.randn(500), bins =5, color='k', alpha=0.5)

plt.subplots_adjust(wspace=0.05, hspace=0.05)

# 2

from numpy.random import randn

arr=randn(30)

arrCumSum=arr.cumsum()

plt.plot(arrCumSum, color='k', linestyle='dashed', drawstyle='steps-post', label='steps-post', marker='o')

plt.legend(loc='best')     #label='steps-post'

plt.show()

Annotations and Drawing on a Subplot

 

import numpy as np

import pandas as pd

from datetime import datetime

                                       #index_col : int or sequence or False, default None

data = pd.read_csv('../examples/spx.csv',parse_dates=True, index_col=0)

spx = data['SPX']  #'SPX' column

 

crisis_data=[

    (datetime(2007, 10, 11), 'Peak of bull market'),  #tuple

    (datetime(2008,  3, 12), 'Bear Stearns Fails'),

    (datetime(2008,  9, 15), 'Lehman Bankruptcy')

]

# // matplotlib Configuration

plt.rc('figure', figsize=(10,10))

font_options={

    'family': 'monospace',

    'weight': 'bold',

    'size': 16

}

plt.rc('font', **font_options)

 

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

 

spx.plot(ax=ax, color='green', linestyle='-')

 

for date, label in crisis_data:

    ax.annotate(  label,

                ha='left',

                va='top',

                xytext=(date, spx.asof(date) + 225), #The xytext parameter specifies the text position.

                xy=(date, spx.asof(date) + 75),     #The xy parameter specifies the arrow's destination         

                arrowprops=dict(facecolor='blue', headwidth=10, headlength=4, width=2 ),

                #arrowprops={'facecolor':'blue', 'headwidth':10, 'headlength':4, 'width':2}

               )

#Zoom in on 2007-2010

ax.set_xlim(['1/1/2007', '1/1/2011'])

ax.set_ylim([600,1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

 

plt.show()

Adding arrows

The aspect of the arrow is controlled by a dictionary passed to the arrowprops parameter: 'arrowstyle': The parameters ''<-'', ''<'', ''-'', ''wedge'',''simple'', and ''fancy'' control the style of the arrow 'facecolor': This is the color used for the arrow. It will be used to set the background and the edge color 'edgecolor': This is the color used for the edges of the arrow's shape 'alpha': This is used to set the transparency level so that the arrow blends with the background

The shrink parameter controls the gap between the arrow's endpoints and the arrow itself.

Facet Grids分面网格 and Categorical Data类型数据

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

tips = pd.read_csv('../examples/tips.csv')

tips.head()

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()

                                         #categorical data                        

sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker', kind='bar', data=tips[tips.tip_pct <1])

plt.show()

                                         #categorical data                        

sns.factorplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct <1])

plt.show()

 

sns.factorplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct<0.5])

plt.show()

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值