Bar plotting or hist plotting_figures and subplots-CSDN博客

本文链接：https://blog.csdn.net/Linli522362242/article/details/87891370

In [74]: import numpy as np

import matplotlib.pyplot as plt

In [75]: data = [5., 25., 50, 20]

In [79]: plt.bar( range(len(data)), data )

plt.show()

In [80]: range(len(data))

Out[80]: range(0, 4)

In [81]: plt.barh( range(len(data)), data)

plt.show()

In [ ]:

In [82]: import matplotlib.pyplot as plt

In [83]: data = [5,25,50,20]

In [84]: plt.bar( range(len(data)), data, width=1)

plt.show()

In [87]: plt.barh( range(len(data)), data, height=1)

plt.show()

In [89]:

In [ ]:

In [90]: import numpy as np

import matplotlib.pyplot as plt

In [96]: data = [ [5,25,50,20], [4,23,51,17], [6,22,52,19] ]

XList = np.arange(4)

In [97]: w=0.25

plt.bar(XList+w*0, data[0], color='b', width=w) #w*0 or w*1 or w*2 做偏移

# center the first blue bar to 0.0

plt.bar(XList+w*1, data[1], color='g', width=w)

#width=w is the gap

plt.bar(XList+w*2, data[2], color='r', width=w)

# center the first red bar to 0.5

plt.show()

In [ ]:

In [99]: import numpy as np

import matplotlib.pyplot as plt

In [100]: data = [[5,25,50,20], [4,23,51,17], [6,22,52,19]]

color_list=['b','g','r']

gap = .8/len(data)

In [101]: for i, row in enumerate(data): #the iterator enumerate returns both the current row and its i

X = np.arange(len(row))

plt.bar(X+i*gap, row, width=gap, color = color_list[ i%len(color_list) ])

#i%len(color_list) if len(data)>3

plt.show()

In [ ]:

In [102]: import matplotlib.pyplot as plt

In [106]: A = [5,30,45,22] B = [5,25,50,20]

XList= range(len(A))

In [107]: plt.bar(XList, A, color='b')

plt.bar(XList, B, color='r', bottom=A) #default width: 0.8

plt.show()

In [ ]:

In [108]: import numpy as np

import matplotlib.pyplot as plt

In [109]: A = np.array([5,30,45,22])

B = np.array([5,20,50,20])

C = np.array([1,2,1,1])

xList=np.arange(4)

In [110]: plt.bar(xList, A, color='b')

plt.bar(xList, B, color='y', bottom=A)

plt.bar(xList, C, color='r', bottom=A+B)

plt.show()

In [ ]:

In [111]: import numpy as np

import matplotlib.pyplot as plt

In [112]: data=np.array([ [5,30,45,22], [5,20,50,20], [1, 2, 1, 1] ])

colorList = ['b', 'y','r']

xList = np.arange(data.shape[1]) #data.shape[1] return the number of columns

for i in range(data.shape[0]): #axis=0 to remove row: sum by column

plt.bar(xList, data[i], bottom=np.sum(data[:i], axis=0), color=color_list[ i%len(color_list) ])

plt.show()

In [ ]:

In [113]: import numpy as np

import matplotlib.pyplot as plt

In [115]: women_pop = np.array([5,30,45,22])

men_pop = np.array([5,25,50,20])

xList= np.arange(4)

In [116]: plt.barh(xList, women_pop, color='r')

plt.barh(xList, -men_pop, color='b')

plt.show()

In [ ]:

In [119]: import numpy as np

import matplotlib.pyplot as plt

In [120]: xList = np.random.randn(1000)

In [121]: plt.hist(xList, bins=20)

plt.show()

In [122]: plt.hist(xList, bins=50)

plt.show()

In [ ]:

3 Using custom colors for bar charts

In [13]: import numpy as np

import matplotlib.pyplot as plt

In [14]: women_pop = np.array([5.0, 30.,45., 22.])

men_pop = np.array([5.0, 25., 50., 20.])

In [15]: X=np.arange(4) #0~3

In [18]: plt.barh(X, women_pop, color='0.25')

plt.barh(X, -men_pop, color='0.75')# The parameter edgecolor is alsoavailable

plt.show().

Out[18]:

In [19]: import numpy as np

import matplotlib.pyplot as plt

In [21]: values = np.random.randint(99, size=50) #generating 50 numbers with 0<=values<=99

In [22]: values

Out[22]: array([ 8, 21, 30, 9, 74, 0, 91, 97, 81, 80, 21, 47, 18, 3, 81, 53, 22, 84, 50, 2, 33, 82, 93, 89, 51, 71, 87, 48, 0, 57, 15, 38, 66, 48, 75, 98, 46, 35, 33, 20, 28, 30, 20, 80, 83, 68, 29, 13, 38, 61])

In [25]: color_set = ('.00','.25','.50','.75')

##Python中的 // 与 / 的区别, " / " 表示浮点数除法

color_list=[ color_set[ (len(color_set) * val) //100 ] for val in values ]

plt.bar(np.arange(len(values)), values, color=color_list)

plt.show()

In [26]: (len(color_set) * 8) //100

Out[26]: 0

In [28]: color_set = ('.00','.25','.50','.75')

##Python中的 // 与 / 的区别, " / " 表示浮点数除法, "//"表示整数除法

color_list=[ color_set[ (len(color_set) * val) //100 ] for val in sorted(values) ] #sort the v

plt.bar(np.arange(len(values)), values, color=color_list)

plt.show()

7 Using colormaps for bar charts

In [11]: import numpy as np

import matplotlib.cm as cm

import matplotlib.colors as col

import matplotlib.pyplot as plt

In [13]: values = np.random.randint(99, size=50) # 50 numbers (0~99)

In [14]: #normalize data into the [0.0, 1.0] interval

cmap = cm.ScalarMappable(col.Normalize(0,99), cm.binary)

In [16]: #converts the list of values to a list of color

plt.bar(np.arange(len(values)), values, color = cmap.to_rgba(values))

plt.show()

# # we use the linestyle parameter of pyplot.plot() to control the line
# pattern of three different curves. The following line styles are available:
# ### Solid
# ### Dashed
# ### Dotted
# ### Dashdot

The line style with other plot types

# In[24]:
import numpy as np
import matplotlib.pyplot as plt

# In[25]:
N=8
A=np.random.random(N)
B=np.random.random(N)
X=np.arange(N)

# In[60]:

plt.bar(X, A, color='0.75')
##edgecolor='y'
b=plt.bar(X, A+B, bottom=A, color='w', linestyle='dashed', linewidth=1,edgecolor='y')
plt.show()

Controlling a fill pattern
hatch pattern
# /
# \
# |
# -
# +
# x
# o
# O
# .
# *

edgecolor parameter will control the color of the hatching.

# In[70]:
import numpy as np
import matplotlib.pyplot as plt

# In[72]:
N = 8
A = np.random.random(N)
B = np.random.random(N)
X = np.arange(N)

# In[77]:
#edgecolor parameter will control the color of the hatching.
plt.bar(X, A, color='w', hatch='x', edgecolor='k')
plt.bar(X, A+B, bottom=A, color='W', hatch='/', edgecolor='k')

plt.show()

Bar Plots¶

The plot.bar() and plot.barh() make vertical and horizontal bar plots, respectively

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

fig, axes = plt.subplots(2,1) #row=2 column=1

data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop')) #rand(): [0,1)

data.plot.bar(ax=axes[0], color='k', alpha=0.7, rot=0)

data.plot.barh(ax=axes[1],color='b', alpha=0.7)

plt.show()

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

df = pd.DataFrame(np.random.rand(6,4),

index=['one', 'two', 'three', 'four', 'five', 'six'],

columns=pd.Index(['A','B','C','D'], name='Genus'))

df.plot.barh(stacked=True,alpha=0.5,rot=0)

plt.legend(loc='upper right',title='Genus')

plt.show()

A useful recipe for bar plots is to visualize a Series’s value frequency using value_counts: s.value_counts().plot.bar().

#######################################################

tips.csv

#######################################################

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

tips = pd.read_csv('../examples/tips.csv')

tips.head()

#axis[0] #axis[1]

party_counts = pd.crosstab(tips['day'], tips['size'])

party_counts

party_counts = party_counts.loc[:,2:5] #label[2,3,4,5]

party_counts

# Normalize to sum to 1

party_pcts = party_counts.div(party_counts.sum(1), axis=0)

#groub by (axis=0) day, 16/(16+1+1+0)= 0.888889

party_pcts

party_pcts.plot.bar(rot=90)

plt.show()

Conclusion:

So you can see that party sizes appear to increase on the weekend in this dataset.

seaborn

import seaborn as sns

import matplotlib.pyplot as plt

tips = pd.read_csv('../examples/tips.csv')

tips.head()

#0.063204 = 1.01 / (16.99 - 1.01 )

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()

sns.barplot(data=tips, x='tip_pct', y='day', orient='h')

sns.set(style=None)

plt.show() #The black lines drawn on the bars represent the 95% confidence interval

sns.barplot(data=tips, x='tip_pct', y='day', orient='h', hue='time')

sns.set(style='whitegrid')

plt.legend(loc='center right', title='time')

plt.show()

help(sns.set)

help(sns.axes_style)

Histograms and Density Plots¶

A histogram is a kind of bar plot that gives a discretized display of value frequency. The data points are split into discrete, evenly spaced bins, and the number of data points in each bin is plotted.

import seaborn as sns

import matplotlib.pyplot as plt

tips = pd.read_csv('../examples/tips.csv')

tips.head()

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip']) #sorted then split the data points (depend on their values)

tips.head()

tips['tip_pct'].plot.hist(bins=50) #sorted tips['tip_pct'] then split the data points (depend on their values) to 50 bins

plt.title('Histogram of tip percentages')

plt.show()

A related plot type is a density plot, which is formed by computing an estimate of a

continuous probability distribution that might have generated the observed data.

density plots are also known as kernel density estimate (KDE) plots.

Using plot.kde makes a density plot using the conventional mixture-of-normals estimate

tips['tip_pct'].plot.density()

plt.title('Density plot of tip percentages')

plt.show()

高斯分布（Gaussian Distribution）的概率密度函数（probability density function）：

np.random.randn(size)所谓标准正态分布（μ=0,σ=1），对应于np.random.normal(loc=0, scale=1, size)

#normal distribution mu=0, sigma=1=std.dev

Seaborn makes histograms and density plots even easier through its distplot

method, which can plot both a histogram and a continuous density estimate simultaneously.

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

#mu=0 sigma=1=std.dev sampling size=200

comp1 = np.random.normal(0,1,size=200)

#mu=10, sigma=2=std.dev

comp2 = np.random.normal(10,2, size=200)

values= pd.Series(np.concatenate([comp1, comp2]))

sns.distplot(values, bins=100, color='k')

plt.title('Normalized histogram of normal mixture with density estimate')

plt.show()

Figures and Subplots

Plots in matplotlib reside within a Figure object.

import matplotlib.pyplot as plt

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

#pass no label or label='_nolegend_'

ax.plot(randn(1000).cumsum(), color='k', label='one')

ax.plot(randn(1000).cumsum(), color='k', linestyle='--', label='two')

ax.plot(randn(1000).cumsum(), color='k', linestyle='dotted',label='three')

ticks = ax.set_xticks([0,250,500,750,1000])

labels = ax.set_xticklabels(['one','two', 'three', 'four', 'five'], rotation=30, fontsize='small')

ax.set_title('My first matplotlib plot')

ax.set_xlabel('Stages')

props = {

'title': 'My first matplotlib plot',

'xlabel': 'Stages'

}

ax.set(**props)

ax.legend(loc='best')

plt.show()

matplotlib draws on the last figure and subplot used (creating one if necessary), thus hiding the figure and subplot creation.

plt.plot(np.random.randn(50).cumsum(), color='black', ls='--')

matplotlib includes a convenience method, plt.subplots, that creates a new figure and returns a NumPy array containing the created subplot objects, the axes array can be easily indexed like a two-dimensional array; for example, axes[0, 1].

Adjusting the spacing around subplots

plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

# 1

fig, axes = plt.subplots(2,2, sharex=True, sharey= True)

for i in range(2):

for j in range(2):

axes[i,j].hist(np.random.randn(500), bins =5, color='k', alpha=0.5)

plt.subplots_adjust(wspace=0.05, hspace=0.05)

# 2

from numpy.random import randn

arr=randn(30)

arrCumSum=arr.cumsum()

plt.plot(arrCumSum, color='k', linestyle='dashed', drawstyle='steps-post', label='steps-post', marker='o')

plt.legend(loc='best') #label='steps-post'

plt.show()

Annotations and Drawing on a Subplot

import numpy as np

import pandas as pd

from datetime import datetime

#index_col : int or sequence or False, default None

data = pd.read_csv('../examples/spx.csv',parse_dates=True, index_col=0)

spx = data['SPX'] #'SPX' column

crisis_data=[

(datetime(2007, 10, 11), 'Peak of bull market'), #tuple

(datetime(2008, 3, 12), 'Bear Stearns Fails'),

(datetime(2008, 9, 15), 'Lehman Bankruptcy')

]

# // matplotlib Configuration

plt.rc('figure', figsize=(10,10))

font_options={

'family': 'monospace',

'weight': 'bold',

'size': 16

}

plt.rc('font', **font_options)

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

spx.plot(ax=ax, color='green', linestyle='-')

for date, label in crisis_data:

ax.annotate( label,

ha='left',

va='top',

xytext=(date, spx.asof(date) + 225), #The xytext parameter specifies the text position.

xy=(date, spx.asof(date) + 75), #The xy parameter specifies the arrow's destination

arrowprops=dict(facecolor='blue', headwidth=10, headlength=4, width=2 ),

#arrowprops={'facecolor':'blue', 'headwidth':10, 'headlength':4, 'width':2}

)

#Zoom in on 2007-2010

ax.set_xlim(['1/1/2007', '1/1/2011'])

ax.set_ylim([600,1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

plt.show()

Adding arrows

The aspect of the arrow is controlled by a dictionary passed to the arrowprops parameter: 'arrowstyle': The parameters ''<-'', ''<'', ''-'', ''wedge'',''simple'', and ''fancy'' control the style of the arrow 'facecolor': This is the color used for the arrow. It will be used to set the background and the edge color 'edgecolor': This is the color used for the edges of the arrow's shape 'alpha': This is used to set the transparency level so that the arrow blends with the background

The shrink parameter controls the gap between the arrow's endpoints and the arrow itself.