import matplotlib.pyplot as plt
edu = [0.2515,0.3724,0.3336,0.0368,0.0057]
labels = ['中专','大专','本科','硕士','其他']
plt.pie(x = edu,
labels=labels,
autopct='%.1f%%'
)
plt.title('失信用户的教育水平分布')
plt.show()
explode = [0,0.1,0,0,0]
colors=['#9999ff','#ff9999','#7777aa','#2442aa','#dd5555']
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
plt.axes(aspect='equal')
plt.pie(x = edu,
explode=explode,
labels=labels,
colors=colors,
autopct='%.1f%%',
pctdistance=0.8,
labeldistance = 1.1,
startangle = 180,
radius = 1.2,
counterclock = False,
wedgeprops = {'linewidth': 1.5, 'edgecolor':'green'},
textprops = {'fontsize':10, 'color':'black'},
)
plt.title('失信用户的受教育水平分布')
plt.show()
import pandas as pd
data1 = pd.Series({'中专':0.2515,'大专':0.3724,'本科':0.3336,'硕士':0.0368,'其他':0.0057})
data1.name = ''
plt.axes(aspect = 'equal')
data1.plot(kind = 'pie',
autopct='%.1f%%',
radius = 1,
startangle = 180,
counterclock = False,
title = '失信用户的受教育水平分布',
wedgeprops = {'linewidth': 1.5, 'edgecolor':'green'},
textprops = {'fontsize':10, 'color':'black'}
)
plt.show()
GDP = pd.read_excel(r'C:\Users\Administrator\Desktop\Province GDP 2017.xlsx')
plt.style.use('ggplot')
plt.bar(left = range(GDP.shape[0]),
height = GDP.GDP,
tick_label = GDP.Province,
color = 'steelblue',
)
plt.ylabel('GDP(万亿)')
plt.title('2017年度6个省份GDP分布')
for x,y in enumerate(GDP.GDP):
plt.text(x,y+0.1,'%s' %round(y,1),ha='center')
plt.show()
GDP.sort_values(by = 'GDP', inplace = True)
plt.barh(bottom = range(GDP.shape[0]),
width = GDP.GDP,
tick_label = GDP.Province,
color = 'steelblue',
)
plt.xlabel('GDP(万亿)')
plt.title('2017年度6个省份GDP分布')
for y,x in enumerate(GDP.GDP):
plt.text(x+0.1,y,'%s' %round(x,1),va='center')
plt.show()
Industry_GDP = pd.read_excel(r'C:\Users\Administrator\Desktop\Industry_GDP.xlsx')
Quarters = Industry_GDP.Quarter.unique()
Industry1 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第一产业']
Industry1.index = range(len(Quarters))
Industry2 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第二产业']
Industry2.index = range(len(Quarters))
Industry3 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第三产业']
plt.bar(left = range(len(Quarters)), height=Industry1, color = 'steelblue', label = '第一产业', tick_label = Quarters)
plt.bar(left = range(len(Quarters)), height=Industry2, bottom = Industry1, color = 'green', label = '第二产业')
plt.bar(left = range(len(Quarters)), height=Industry3, bottom = Industry1 + Industry2, color = 'red', label = '第三产业')
plt.ylabel('生成总值(亿)')
plt.title('2017年各季度三产业总值')
plt.legend()
plt.show()
import numpy as np
HuRun = pd.read_excel(r'C:\Users\Administrator\Desktop\HuRun.xlsx')
Cities = HuRun.City.unique()
Counts2016 = HuRun.Counts[HuRun.Year == 2016]
Counts2017 = HuRun.Counts[HuRun.Year == 2017]
bar_width = 0.4
plt.bar(left = np.arange(len(Cities)), height = Counts2016, label = '2016', color = 'steelblue', width = bar_width)
plt.bar(left = np.arange(len(Cities))+bar_width, height = Counts2017, label = '2017', color = 'indianred', width = bar_width)
plt.xticks(np.arange(5)+0.2, Cities)
plt.ylabel('亿万资产家庭数')
plt.title('近两年5个城市亿万资产家庭数比较')
plt.legend()
plt.show()
GDP.GDP.plot(kind = 'bar', width = 0.8, rot = 0, color = 'steelblue', title = '2017年度6个省份GDP分布')
plt.ylabel('GDP(万亿)')
plt.xticks(range(len(GDP.Province)),
GDP.Province
)
for x,y in enumerate(GDP.GDP):
plt.text(x-0.1,y+0.2,'%s' %round(y,1),va='center')
plt.show()
HuRun_reshape = HuRun.pivot_table(index = 'City', columns='Year', values='Counts').reset_index()
HuRun_reshape.sort_values(by = 2016, ascending = False, inplace = True)
HuRun_reshape.plot(x = 'City', y = [2016,2017], kind = 'bar', color = ['steelblue', 'indianred'],
rot = 0,
width = 0.8, title = '近两年5个城市亿万资产家庭数比较')
plt.ylabel('亿万资产家庭数')
plt.xlabel('')
plt.show()
import seaborn as sns
sns.barplot(y = 'Province',
x = 'GDP',
data = GDP,
color = 'steelblue',
orient = 'horizontal'
)
plt.xlabel('GDP(万亿)')
plt.ylabel('')
plt.title('2017年度6个省份GDP分布')
for y,x in enumerate(GDP.GDP):
plt.text(x,y,'%s' %round(x,1),va='center')
plt.show()
Titanic = pd.read_csv(r'C:\Users\Administrator\Desktop\titanic_train.csv')
sns.barplot(x = 'Pclass',
y = 'Age',
hue = 'Sex',
data = Titanic,
palette = 'RdBu',
errcolor = 'blue',
errwidth=2,
saturation = 1,
capsize = 0.05
)
plt.title('各船舱等级中男女乘客的年龄差异')
plt.show()
any(Titanic.Age.isnull())
Titanic.dropna(subset=['Age'], inplace=True)
plt.hist(x = Titanic.Age,
bins = 20,
color = 'steelblue',
edgecolor = 'black'
)
plt.xlabel('年龄')
plt.ylabel('频数')
plt.title('乘客年龄分布')
plt.show()
Titanic.Age.plot(kind = 'hist', bins = 20, color = 'steelblue', edgecolor = 'black', normed = True, label = '直方图')
Titanic.Age.plot(kind = 'kde', color = 'red', label = '核密度图')
plt.xlabel('年龄')
plt.ylabel('核密度值')
plt.title('乘客年龄分布')
plt.legend()
plt.show()
Age_Male = Titanic.Age[Titanic.Sex == 'male']
Age_Female = Titanic.Age[Titanic.Sex == 'female']
sns.distplot(Age_Male, bins = 20, kde = False, hist_kws = {'color':'steelblue'}, label = '男性')
sns.distplot(Age_Female, bins = 20, kde = False, hist_kws = {'color':'purple'}, label = '女性')
plt.title('男女乘客的年龄直方图')
plt.legend()
plt.show()
sns.distplot(Age_Male, hist = False, kde_kws = {'color':'red', 'linestyle':'-'},
norm_hist = True, label = '男性')
sns.distplot(Age_Female, hist = False, kde_kws = {'color':'black', 'linestyle':'--'},
norm_hist = True, label = '女性')
plt.title('男女乘客的年龄核密度图')
plt.legend()
plt.show()
Sec_Buildings = pd.read_excel(r'C:\Users\Administrator\Desktop\sec_buildings.xlsx')
plt.boxplot(x = Sec_Buildings.price_unit,
patch_artist=True,
showmeans=True,
boxprops = {'color':'black','facecolor':'steelblue'},
flierprops = {'marker':'o','markerfacecolor':'red', 'markersize':3},
meanprops = {'marker':'D','markerfacecolor':'indianred', 'markersize':4},
medianprops = {'linestyle':'--','color':'orange'},
labels = ['']
)
plt.title('二手房单价分布的箱线图')
plt.show()
group_region = Sec_Buildings.groupby('region')
avg_price = group_region.aggregate({'price_unit':np.mean}).sort_values('price_unit', ascending = False)
region_price = []
for region in avg_price.index:
region_price.append(Sec_Buildings.price_unit[Sec_Buildings.region == region])
plt.boxplot(x = region_price,
patch_artist=True,
labels = avg_price.index,
showmeans=True,
boxprops = {'color':'black', 'facecolor':'steelblue'},
flierprops = {'marker':'o','markerfacecolor':'red', 'markersize':3},
meanprops = {'marker':'D','markerfacecolor':'indianred', 'markersize':4},
medianprops = {'linestyle':'--','color':'orange'}
)
plt.ylabel('单价(元)')
plt.title('不同行政区域的二手房单价对比')
plt.show()
sns.boxplot(x = 'region', y = 'price_unit', data = Sec_Buildings,
order = avg_price.index, showmeans=True,color = 'steelblue',
flierprops = {'marker':'o','markerfacecolor':'red', 'markersize':3},
meanprops = {'marker':'D','markerfacecolor':'indianred', 'markersize':4},
medianprops = {'linestyle':'--','color':'orange'}
)
plt.xlabel('')
plt.ylabel('单价(元)')
plt.title('不同行政区域的二手房单价对比')
plt.show()
tips = pd.read_csv(r'C:\Users\Administrator\Desktop\tips.csv')
sns.violinplot(x = "total_bill",
y = "day",
hue = "sex",
data = tips,
order = ['Thur','Fri','Sat','Sun'],
scale = 'count',
split = True,
palette = 'RdBu'
)
plt.title('每天不同性别客户的消费额情况')
plt.legend(loc = 'upper center', ncol = 2)
plt.show()
wechat = pd.read_excel(r'C:\Users\Administrator\Desktop\wechat.xlsx')
plt.plot(wechat.Date,
wechat.Counts,
linestyle = '-',
linewidth = 2,
color = 'steelblue',
marker = 'o',
markersize = 6,
markeredgecolor='black',
markerfacecolor='brown')
plt.ylabel('人数')
plt.title('每天微信文章阅读人数趋势')
plt.show()
import matplotlib as mpl
plt.plot(wechat.Date,
wechat.Counts,
linestyle = '-',
color = 'steelblue',
label = '阅读人数'
)
plt.plot(wechat.Date,
wechat.Times,
linestyle = '--',
color = 'indianred',
label = '阅读人次'
)
ax = plt.gca()
date_format = mpl.dates.DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_format)
xlocator = mpl.ticker.MultipleLocator(7)
ax.xaxis.set_major_locator(xlocator)
plt.xticks(rotation=45)
plt.ylabel('人数')
plt.title('每天微信文章阅读人数与人次趋势')
plt.legend()
plt.show()
weather = pd.read_excel(r'C:\Users\Administrator\Desktop\weather.xlsx')
data = weather.pivot_table(index = 'month', columns='year', values='high')
data.plot(kind = 'line',
style = ['-','--',':']
)
plt.xlabel('月份')
plt.ylabel('气温')
plt.title('每月平均最高气温波动趋势')
plt.show()
iris = pd.read_csv(r'C:\Users\Administrator\Desktop\iris.csv')
plt.scatter(x = iris.Petal_Width,
y = iris.Petal_Length,
color = 'steelblue'
)
plt.xlabel('花瓣宽度')
plt.ylabel('花瓣长度')
plt.title('鸢尾花的花瓣宽度与长度关系')
plt.show()
iris.plot(x = 'Petal_Width', y = 'Petal_Length', kind = 'scatter', title = '鸢尾花的花瓣宽度与长度关系')
plt.xlabel('花瓣宽度')
plt.ylabel('花瓣长度')
plt.show()
sns.lmplot(x = 'Petal_Width',
y = 'Petal_Length',
hue = 'Species',
data = iris,
legend_out = False,
truncate=True
)
plt.xlabel('花瓣宽度')
plt.ylabel('花瓣长度')
plt.title('鸢尾花的花瓣宽度与长度关系')
plt.show()
Prod_Category = pd.read_excel(r'C:\Users\Administrator\Desktop\SuperMarket.xlsx')
range_diff = Prod_Category.Profit_Ratio.max()-Prod_Category.Profit_Ratio.min()
Prod_Category['std_ratio'] = (Prod_Category.Profit_Ratio-Prod_Category.Profit_Ratio.min())/range_diff + 0.001
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '办公用品'],
y = Prod_Category.Profit[Prod_Category.Category == '办公用品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '办公用品']*1000,
color = 'steelblue', label = '办公用品', alpha = 0.6
)
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '技术产品'],
y = Prod_Category.Profit[Prod_Category.Category == '技术产品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '技术产品']*1000,
color = 'indianred' , label = '技术产品', alpha = 0.6
)
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '家具产品'],
y = Prod_Category.Profit[Prod_Category.Category == '家具产品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '家具产品']*1000,
color = 'black' , label = '家具产品', alpha = 0.6
)
plt.xlabel('销售额')
plt.ylabel('利润')
plt.title('销售额、利润及利润率的气泡图')
plt.legend()
plt.show()
Sales = pd.read_excel(r'C:\Users\Administrator\Desktop\Sales.xlsx')
Sales['year'] = Sales.Date.dt.year
Sales['month'] = Sales.Date.dt.month
Summary = Sales.pivot_table(index = 'month', columns = 'year', values = 'Sales', aggfunc = np.sum)
sns.heatmap(data = Summary,
cmap = 'PuBuGn',
linewidths = .1,
annot = True,
fmt = '.1e'
)
plt.title('每年各月份销售总额热力图')
plt.show()
Prod_Trade = pd.read_excel(r'C:\Users\Administrator\Desktop\Prod_Trade.xlsx')
Prod_Trade['year'] = Prod_Trade.Date.dt.year
Prod_Trade['month'] = Prod_Trade.Date.dt.month
plt.figure(figsize = (12,6))
ax1 = plt.subplot2grid(shape = (2,3), loc = (0,0))
Class_Counts = Prod_Trade.Order_Class[Prod_Trade.year == 2012].value_counts()
Class_Percent = Class_Counts/Class_Counts.sum()
ax1.set_aspect(aspect = 'equal')
ax1.pie(x = Class_Percent.values, labels = Class_Percent.index, autopct = '%.1f%%')
ax1.set_title('各等级订单比例')
ax2 = plt.subplot2grid(shape = (2,3), loc = (0,1))
Month_Sales = Prod_Trade[Prod_Trade.year == 2012].groupby(by = 'month').aggregate({'Sales':np.sum})
Month_Sales.plot(title = '2012年各月销售趋势', ax = ax2, legend = False)
ax2.set_xlabel('')
ax3 = plt.subplot2grid(shape = (2,3), loc = (0,2), rowspan = 2)
sns.boxplot(x = 'Transport', y = 'Trans_Cost', data = Prod_Trade, ax = ax3)
ax3.set_title('各运输方式成本分布')
ax3.set_xlabel('')
ax3.set_ylabel('运输成本')
ax4 = plt.subplot2grid(shape = (2,3), loc = (1,0), colspan = 2)
sns.distplot(Prod_Trade.Sales[Prod_Trade.year == 2012], bins = 40, norm_hist = True, ax = ax4, hist_kws = {'color':'steelblue'}, kde_kws=({'linestyle':'--', 'color':'red'}))
ax4.set_title('2012年客单价分布图')
ax4.set_xlabel('销售额')
plt.subplots_adjust(hspace=0.6, wspace=0.3)
plt.show()