# 有这5小段代码在手，轻松实现数据可视化（Python+Matplotlib）

Matplotlib是个很流行的Python库，可以轻松实现数据可视化。但是，每次执行新项目的绘图时，设置数据、参数、图形的过程都非常的繁琐。 在本文中，我们将着眼于5种数据可视化方法，用Python的Matplotlib库实现一些快速而简单的功能。

import matplotlib.pyplot as plt
import numpy as np

def scatterplot(x_data, y_data, x_label="", y_label="", title="", color = "r", yscale_log=False):

# Create the plot object
_, ax
= plt.subplots()

# Plot the data, set the size (s), color and transparency (alpha)
# of the points
ax.scatter(x_data, y_data, s = 10, color = color, alpha = 0.75)

if yscale_log
== True:
ax.set_yscale('log')

# Label the axes and provide a title
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)

def lineplot(x_data, y_data, x_label="", y_label="", title=""):
# Create the plot object
_, ax
= plt.subplots()

# Plot the best fit line, set the linewidth (lw), color and
# transparency (alpha) of the line
ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)

# Label the axes and provide a title
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)

正态分布的IQ

def histogram(data, n_bins, cumulative=False, x_label = "", y_label = "", title = ""):
_, ax
= plt.subplots()
ax.hist(data, n_bins = n_bins, cumulative = cumulative, color = '#539caf')
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)

• 设置水平范围，以适应两种可变分布；

• 根据这个范围和期望的分组数量，计算并设置组距；

• 设置其中一个变量具有更高透明度，以便在一张图上显示两个分布。

# Overlay 2 histograms to compare them
def overlaid_histogram(data1, data2, n_bins = 0, data1_name="", data1_color="#539caf", data2_name="", data2_color="#7663b0", x_label="", y_label="", title=""):
# Set the bounds for the bins so that the two distributions are fairly compared
max_nbins
= 10
data_range = [min(min(data1), min(data2)), max(max(data1), max(data2))]
binwidth = (data_range[1] - data_range[0]) / max_nbins

if n_bins == 0
bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth)
else:
bins = n_bins

# Create the plot
_, ax = plt.subplots()
ax.hist(data1, bins = bins, color = data1_color, alpha = 1, label = data1_name)
ax.hist(data2, bins = bins, color = data2_color, alpha = 0.75, label = data2_name)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'best')

def barplot(x_data, y_data, error_data, x_label="", y_label="", title=""):
_, ax
= plt.subplots()
# Draw bars, position them in the center of the tick mark on the x-axis
ax.bar(x_data, y_data, color = '#539caf', align = 'center')
# Draw error bars to show standard deviation, set ls to 'none'
# to remove line between points
ax.errorbar(x_data, y_data, yerr = error_data, color = '#297083', ls = 'none', lw = 2, capthick = 2)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)

def stackedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
_, ax
= plt.subplots()
# Draw bars, one category at a time
for i in range(0, len(y_data_list)):
if i
== 0:
ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
else:
# For each category after the first, the bottom of the
# bar will be the top of the last category
ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'upper right')

def groupedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
_, ax
= plt.subplots()
# Total width for all bars at one x location
total_width = 0.8
# Width of each individual bar
ind_width = total_width / len(y_data_list)
# This centers each cluster of bars about the x tick mark
alteration = np.arange(-(total_width/2), total_width/2, ind_width)

# Draw bars, one category at a time
for i in range(0, len(y_data_list)):
# Move the bar to the right on the x-axis so it doesn't
# overlap with previously drawn ones
ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'upper right')

def boxplot(x_data, y_data, base_color="#539caf", median_color="#297083", x_label="", y_label="", title=""):
_, ax
= plt.subplots()

# Draw boxplots, specifying desired style
ax.boxplot(y_data
# patch_artist must be True to control box fill
, patch_artist = True
# Properties of median line
, medianprops = {'color': median_color}
# Properties of box
, boxprops = {'color': base_color, 'facecolor': base_color}
# Properties of whiskers
, whiskerprops = {'color': base_color}
# Properties of whisker caps
, capprops = {'color': base_color})

# By default, the tick label starts at 1 and increments by 1 for
# each box drawn. This sets the labels to the ones we want
ax.set_xticklabels(x_data)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)