网上看见有分布点图代码但是我这边运行报错了
对代码进行了修正。
主要是 ax.scatter(y=np.repeat(i, df_make.shape[0]), x= df_make.cty, s=75, edgecolors=‘blue’, c=‘b’,
alpha=0.5)
会报ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
进行了修正
为适合自己的数据修改了x轴显示范围,设置了对数坐标
在这里插入代码片
```import matplotlib.patches as mpatches
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df_raw1 = pd.read_csv("./res.dat", header=None, delimiter=' ')
df_raw=df_raw1.copy()
df_raw=df_raw.iloc[:, [0,1]]
df_raw.columns = ['manufacturer','cty']
# Mean and Median city mileage by make
df = df_raw[['cty',
'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', ascending=False, inplace=True)
df.reset_index(inplace=True)
df_median = df_raw[['cty', 'manufacturer'
]].groupby('manufacturer').apply(lambda x: x.median())
# Draw horizontal lines
fig, ax = plt.subplots(figsize=(15, 11), dpi=80)
ax.hlines(y=df.index,
xmin=0,
xmax=6000,
color='#01a2d9',
alpha=0.5,
linewidth=.5,
linestyles='dashdot')
# Draw the Dots
for i, make in enumerate(df.manufacturer):
print(i,make)
x=df_raw.manufacturer
df_make = df_raw.loc[df_raw.manufacturer == make,:]
ax.scatter(y=np.repeat(i, df_make.shape[0]), x= df_make.cty, s=75, edgecolors='blue', c='b',
alpha=0.5) # 在一行中绘制出所有的城市里程数据点
ax.scatter(y=i, x='cty', data=df_median.loc[df_median.index == make, :], s=75,
c='firebrick') # 绘制出每个制造商所产汽车的城市里程中位数
# Annotate
# ax.text(33,
# 13,
# "$red \; dots \; are \; the \: median$",
# fontdict={'size': 12},
# color='#dc2624')
# Decorations
red_patch = plt.plot([], [],
marker="o",
ms=10,
ls="",
mec=None,
color='#dc2624',
label="Median")
plt.legend(handles=red_patch)
ax.set_title('Distribution of City Mileage by Make', fontdict={'size': 18})
ax.set_xlabel('Miles Per Gallon (City)')
ax.set_yticks(df.index)
ax.set_yticklabels(df.manufacturer.str.title(),
fontdict={'horizontalalignment': 'right'})
ax.set_xscale("log")
ax.set_xlim(1, 6000)
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["bottom"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.gca().spines["left"].set_visible(False)
plt.grid(axis='both', alpha=.4, linewidth=.1)
plt.show()