matplotlib 函数:
饼状图
ax = plt.subplots()
ax.pie()
def pieChart(sdss_df): # 饼形图
'''绘制饼图以获取标签数量.'''
label_counts = sdss_df['class'].value_counts() # DataFrame['Unit Name'].value_counts() 查看表格某列中有多少个不同值,并计算每个不同值有在该列中有多少重复值
colors = ['skyblue', 'red', 'gold']
fig1, ax1 = plt.subplots()
ax1.pie(label_counts, labels=['Galaxy', 'Stars', 'Quasars'], autopct='%1.2f%%', startangle=45, colors=colors) # 画饼状图,并指定标签和对应颜色
ax1.axis('equal') # axis equal 将横轴纵轴的定标系数设成相同值
plt.title('SDSS Object Classes')
plt.show()
分布直方图
aseaborn.distplot()
https://zhuanlan.zhihu.com/p/161513797
kdeplot(核密度估计图) https://blog.csdn.net/qq_39949963/article/details/79362501
def distribution(sdss_df, axes, feature, row):
'''Plot the distribution of a space object w.r.t. a given feature.'''
labels = np.unique(sdss_df['class']) # np.unique( ) 去除数组中的重复数字,并进行排序之后输出
colors = ['skyblue', 'gold', 'red']
for i in range(len(labels)):
label = labels[i]
# pandas.DataFrame.loc()使用column名和index名进行定位
# 分布直方图 seaborn.distplot
ax = sns.distplot(sdss_df.loc[sdss_df['class']==label, feature], kde=False, bins=30, ax=axes[row, i], color=colors[i])
ax.set_title(label)
if (i == 0):
ax.set(ylabel='Count')
回归图
aseaborn.distplot()
https://zhuanlan.zhihu.com/p/161513797
kdeplot(核密度估计图) https://blog.csdn.net/qq_39949963/article/details/79362501
def distribution(sdss_df, axes, feature, row):
'''Plot the distribution of a space object w.r.t. a given feature.'''
labels = np.unique(sdss_df['class']) # np.unique( ) 去除数组中的重复数字,并进行排序之后输出
colors = ['skyblue', 'gold', 'red']
for i in range(len(labels)):
label = labels[i]
# pandas.DataFrame.loc()使用column名和index名进行定位
# 分布直方图 seaborn.distplot
ax = sns.distplot(sdss_df.loc[sdss_df['class']==label, feature], kde=False, bins=30, ax=axes[row, i], color=colors[i])
ax.set_title(label)
if (i == 0):
ax.set(ylabel='Count')
def main():
# read in SDSS data
filepath = 'Skyserver_12_30_2019 4_49_58 PM.csv'
sdss_df = pd.read_csv(filepath, encoding='utf-8')
# define lists of relevant features
geo = ['ra', 'dec'] # 经纬度
nonugriv = ['redshift', 'plate', 'mjd', 'fiberid']
ugriv = ['u', 'g', 'r', 'i', 'z']
# 绘制标签计数饼图
pieChart(sdss_df)
# 绘制观测的赤道坐标
for row in range(3):
equitorial(sdss_df, row)
plt.show()
# plot the distribution of non-geo and non-ugriv features
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(12, 14))
plt.subplots_adjust(wspace=.4, hspace=.4)
for row in range(len(nonugriv)):
feat = nonugriv[row]
distribution(sdss_df, axes, feat, row)
plt.show()
# plot the distribution of ugriv features
fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(12, 15))
plt.subplots_adjust(wspace=.4, hspace=.4)
for row in range(len(ugriv)):
feat = ugriv[row]
distribution(sdss_df, axes, feat, row)
plt.show()
main()