Python数据可视化

thisissally

已于 2023-08-22 15:16:53 修改

阅读量326

点赞数

分类专栏：特征工程文章标签： python

于 2021-08-13 14:18:53 首次发布

本文链接：https://blog.csdn.net/weixin_45366750/article/details/119674314

版权

特征工程专栏收录该内容

7 篇文章 0 订阅

订阅专栏

用python画图一共有三个逻辑：可以合用

matplotlib.pyplot
seaborn
df.x1.plot(kind=‘’)

一、必备语句

# 导入第三方模块
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter  # 将用户定义的函数应用于值
%matplotlib inline
import seaborn as sns 
# 设置中文和负号正常显示
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False

图像大小

fig=plt.figure(figsize=(18,6)) # 设置图形的长和宽（单位英寸），fig表示绘图窗口
fig, ax = plt.subplots(figsize=(5, 6)) # 调整图像的大小

坐标轴

# ax
ax.set_xlim([-10000, 140000])
ax.set_xlabel( 'Total Revenue' )
ax.set_ylabel( 'Customer' )
ax.set(title= 2014 Revenue , xlabel= Total Revenue , ylabel= Customer )

# plt
plt.xlim(0,1000)
plt.xticks(np.linspace(0,1000,15,endpoint=True))  # 设置x轴刻度
plt.xticks(np.arange(0,21,1))  # 设置x轴刻度
plt.tick_params(top = 'off', right = 'off',labelsize=20) # 去除图边框的顶部刻度和右边刻度

图形在画布上的分布

# （1）每次构造一个子图
names = ['Anime', 'Comic', 'Game']
values = [30, 10, 20]
plt.subplot(221) # 将整个图像窗口分为2行2列, 当前位置为2，也可以用逗号分隔，默认111
plt.bar(names, values) #统计图
plt.subplot(222) # 等同于ax1=fig.add_subplot(2,2,1)
plt.scatter(names, values) #散点图
plt.subplot(223)
plt.plot(names, values) #折线图
plt.suptitle('三种图示',fontname='SimHei')
plt.show()

# （2）先构造所有子图，再依次绘制
fig,ax=plt.subplots(2,2) # 构造2x2的子图
ax[0,1].plot(names, values) # 通过下标访问
ax[1,0].scatter(names, values)
ax[1,1].bar(names, values)
plt.show()

标注

plt.xlabel('M',color="r",fontsize=20) # x,y轴文字，颜色，字体大小
plt.ylabel("Consume Level")
plt.title("Figure.1")
plt.text(2.5,100,"TEXT1") # 图内文字
plt.annotate('max value', xy=(20, 400), xytext=(12.5, 400),arrowprops=dict(facecolor='black', shrink=0.05)) # 指定文字,箭头指向的坐标,文字显示的坐标,箭头的属性
plt.legend('show picture x1 ') # 图例
plt.legend().set_visible(False)# 隐藏图例

把两幅图放在同一个图像上

fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(7, 4))# sharey = True以便yaxis共享相同的标签。

线的参数

plt.plot(x,y,'r')   # 修改颜色,rgb=红绿蓝,默认为蓝
plt.plot(x,y,'--')  # 修改线的形状为虚线,默认为折线'-',另外'o'为点,'^'为三角
plt.plot(x,y,'g--') # 一起修改为绿色虚线

点的参数

marker = 'o', # 点的形状：s方形，h六角形，H六角形，*星形，+加号，x x型，d菱形，D菱形，p五角形
markersize = 5, # 点的大小
markerfacecolor='mediumpurple' # 点的填充色

其他参数

plt.grid(True) # 设置网格线
fig.autofmt_xdate(rotation = 45) # x轴的日期旋转45°
# 绘制一条线
plt.axvline(x=df.x1, color='blue', label= '' , linestyle= '--') # 竖线 
plt.hline(y=df.x2, color='blue', label= '' , linestyle= '--') # 横线

美观

#风格
plt.style.available # 查看所有绘图样式
plt.style.use('ggplot' )
plt.style.use('seaborn') # 灰色格子
#颜色
sns.set_palette('Set1')

二、具体画图语句

1.折线图

# （1）
x=[0,1,2,3]
y=[0,1,1,3]
plt.plot(x,y,lw=,color=,linestyle = ':'，marker='o') # lw线宽，linestyle线型，marker点的形状
plt.show()
# 一张图上多个折线图：
plt.plot(x,y） # 多次
plt.plot(x1,y1,'r-',x2,y2,'g--')
#（2）
sns.lineplot(x,y)
#（3）
sns.relplot(x="prime_genre", y="user_rating",kind='line', data=app)

2.直方图

#（1）
sns.distplot(x,color='y')
#（2）
df['均价'].plot(kind='hist',cumulative = True,legend=True,edgecolor = 'k',alpha=0.4,title=' ') # cumulative = True累积分布直方图，alpha透明度

3.箱线图:

展现数据的分布（如上下四分位值、中位数等），反应数据异常情况。

#（1）一维箱线图
a-plt.boxplot(x = titanic.Age, # 指定绘图数据
patch_artist=True, # 要求用自定义颜色填充盒形图，默认白色填充
showmeans=True, # 以点的形式显示均值
boxprops = {'color':'black','facecolor':'#9999ff'}, # 设置箱体属性，填充色和边框色
flierprops = {'marker':'o','markerfacecolor':'red','color':'black'}, # 设置异常值属性，点的形状、填充色和边框色
meanprops = {'marker':'D','markerfacecolor':'indianred'}, # 设置均值点的属性，点的形状、填充色
medianprops = {'linestyle':'--','color':'orange'}) # 设置中位数线的属性，线的类型和颜色
b-sns.boxplot(y=tired['vehicle_speed'],palette='Set3')

# （2）二维箱线图
1-按顺序分组：titanic.sort_values(by = 'Pclass', inplace=True)
2- 通过for循环将不同仓位的年龄人群分别存储到列表Age变量中
Age = []
Levels = titanic.Pclass.unique()
for Pclass in Levels:
Age.append(titanic.loc[titanic.Pclass==Pclass,'Age'])
3-plt.boxplot(,labels = ['一等舱','二等舱','三等舱'],)
sns.boxplot(x,y)

# （3）三维箱线图
sns.boxplot(x,y,hue)

4.散点图加趋势线（可分组）

#（1）
sns.scatterplot(x,y,data)
#（2）
ax = df.plot(kind="scatter", y='like_num', x='read_num',s=10, figsize=(9,6), fontsize=15)
ax.set_xlabel("阅读量")
ax.set_ylabel("点赞数")
#（3）
plt.scatter()
plt.plot() # 先散点再折线图从而构造趋势线，等同于sns.regplot('local_tv','revenue',store)线性拟合

5.柱状图

a.竖直柱状图

（1）sns.barplot(x='time',y='revenue',hue='channel',data=uniqlo, estimator=sum) # 三个维度
（2）plt.bar(name,values)
（3）计数
#
df['地段'].value_counts().plot(kind='bar', legend=True,title='上海徐汇区二手房在售数量分布区域') 
# 
sns.countplot(y='type',hue='paid',data=app)

b.水平柱状图

（1）df.x1.plot(kind= 'barh' , y="Sales", x="Name")
（2）plt.barh(name,values)
（3）df['地段'].value_counts().plot(kind='barh', legend=True,title='上海徐汇区二手房在售数量分布区域')

c.两组数据比较的垂直柱状图，只需交换index和columns即可

data=pd.DataFrame([x,y],index=['X','Y'],columns=list('abcdefgh'))
data.plot.bar()
plt.show()

d.两组数据交叉比较的垂直柱状图

data.transpose().plot.bar() # data.transpose()转置
plt.show()

三、描述性统计？？？

1.排序：order要转化成index形式

（1）order=app['prime_genre'].value_counts().index
（2）order=[1,2,3,4]
（3）降序画出每类产品的平均销售额（默认均值）sns.barplot(x='product',y='revenue',data=df1,order=df1.groupby(df1['product']).revenue.mean().sort_values(ascending=False).index)
（4）降序画出每类产品的市场份额，既销售总额sns.barplot(x='product',y='revenue',data=df1,estimator=sum,order=df1.groupby(df1['product']).revenue.sum().sort_values(ascending=False).index)
2.分组统计个数：
df.groupby(['channel'])['customer'].count()

四、相关分析

（一）列联表

1.所有变量，任意两个变量相关分析

store.corr()

2.选定个数的变量之间

q1=['x1' ,'x2','x3']
df1[q1].corr( )

3.一对多，其他变量与revenue的相关分析

red.corr()[['revenue']].sort_values('revenue',ascending=False )  # 注意双括号

（二）热力图

sns.heatmap(df1[q1].corr()，cmap='Blues')

（三）pairplot

sns.pairplot(data=red,vars=['revenue','age','previous_order_amount'],height=7,aspect=0.8,kind = 'reg')

五、词云图：

把所有文章的标题用结巴库分词处理加入到 words 列表中，传递给 WordCloud 组件，另外还需要指定一个中文字体，因为 wordcloud 默认无法处理中文。max_words 用于指定最多显示多少词语。

conda install jieba
conda install wordcloud
from wordcloud import WordCloud
import jieba
words = []
for i in  df.title:
seg_list = jieba.cut(i, cut_all=False)
words.append(" ".join(seg_list))
wordcloud = WordCloud(font_path='/Library/Fonts/Songti.ttc',
background_color="white",
max_words=80,).generate(" ".join(words))
plt.figure(figsize=(9,6))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

thisissally

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python数据可视化

用python画图一共有三个逻辑：可以合用matplotlib.pyplotseaborndf.x1.plot(kind=’’)一、必备语句# 导入第三方模块import pandas as pdimport matplotlib.pyplot as plt # 参数定制化from matplotlib.ticker import FuncFormatter # 将用户定义的函数应用于值%matplotlib inlineimport seaborn as sns # 参数能调用的
复制链接

扫一扫

专栏目录