python如何实现循环保存图片,以及怎么保存groupby的数据
首先将代码放上来,方便一些伸手的兄弟,但如果兄弟不烦唠叨,可以继续往下看一下我发现的其中一些数以事项
# 首先通过groupby把数据集分组,然后进行保存
def get_m3044_container():
columns = ['container_id', 'machine_id', 'time_stamp', 'cpu_util_percent',
'mem_util_percent', 'cpi', 'mem_gps', 'mpki', 'net_in', 'net_out',
'disk_io_percent']
container_m3044 = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_usage_2.csv",names=columns)
container_m3044['count'] = 0
container_m3044_group = container_m3044.groupby(['container_id']).count()['count']
container_m3044_group.to_csv("H:\\experiment\\data_set\\container_usage\\container_m3044_group.csv")
pass
# 循环画图
def get_each_container_plt():
columns = ['container_id', 'machine_id', 'time_stamp', 'cpu_util_percent',
'mem_util_percent', 'cpi', 'mem_gps', 'mpki', 'net_in', 'net_out',
'disk_io_percent']
container_group = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_m3044_group.csv")
container_m3044 = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_usage_2.csv",names=columns)
# print(container_m3044.dtype)
container_id_list = container_group['container_id'].values
for container_id in container_id_list:
if container_id == 'c_10997':
continue
df_1 = container_m3044.loc[container_m3044['container_id'].str.contains(container_id,na=False),:]
df_1.to_csv("H:\\experiment\\data_set\\container_usage\\container_"+container_id+".csv", index=False,header=False,mode='a')
pass
for container_id in container_id_list:
df = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_"+container_id+".csv",names=columns)
df.sort_values(by='time_stamp', inplace=True, ascending=True)
df.plot.scatter(y='cpu_util_percent',x='time_stamp',s=0.01)
plt.xticks(rotation=90)
plt.savefig(container_id + "_cpu_time.jpg")
df.plot.scatter(y='mem_util_percent', x='time_stamp', s=0.01)
# 重新铺一遍横轴
plt.xticks(rotation=90)
plt.savefig(container_id + "_mem_time.jpg")
pass
pass
注意事项
1.注意不要加上index = False
# 首先是分组的注意事项,首先代码中最后的保存,注意不要去掉index ,即:不要加上如下语句
index = False #不然container_id 这一类用来分类的工具不会进行保存
2.在敲第二部分的代码发现一个有趣的事情,在最开始的代码中
def get_each_container_plt():
columns = ['container_id', 'machine_id', 'time_stamp', 'cpu_util_percent',
'mem_util_percent', 'cpi', 'mem_gps', 'mpki', 'net_in', 'net_out',
'disk_io_percent']
container_group = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_m3044_group.csv")
container_m3044 = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_usage_2.csv",names=columns)
# print(container_m3044.dtype)
container_id_list = container_group['container_id'].values
for container_id in container_id_list:
if container_id == 'c_10997':
continue
# 一开始的代码是这样写的,但代码里面发现一件有趣的事情,这个df打印出来是一个字符串,名称为container_id
for df in container_m3044:
df_1 = df.loc[df['container_id'].str.contains(container_id,na=False),:]
df_1.to_csv("H:\\experiment\\data_set\\container_usage\\container_"+container_id+".csv", index=False,header=False,mode='a')
pass
for container_id in container_id_list:
df = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_"+container_id+".csv",names=columns)
df.sort_values(by='time_stamp', inplace=True, ascending=True)
df.plot.scatter(y='cpu_util_percent',x='time_stamp',s=0.01)
plt.xticks(rotation=90)
plt.savefig(container_id + "_cpu_time.jpg")
df.plot.scatter(y='mem_util_percent', x='time_stamp', s=0.01)
# 重新铺一遍横轴
plt.xticks(rotation=90)
plt.savefig(container_id + "_mem_time.jpg")
pass
pass
但如下代码中的df却有不一样,这是需要注意的地方
# 不排除可能受到chunksize的影响
def __cpu_time__():
columns = ['container_id', 'machine_id', 'time_stamp', 'cpu_util_percent',
'mem_util_percent', 'cpi', 'mem_gps', 'mpki', 'net_in', 'net_out',
'disk_io_percent']
pd.set_option('display.max_columns', None)
container_usage = pd.read_csv("H:\\experiment\\data_set\\container_usage\\container_usage_1.csv", names=columns,chunksize=100000)
i = 1
for df in container_usage:
df_1 = df.loc[df['container_id'].str.contains('c_10997',na=False),:]
df_1.to_csv("D:\\experiment\\data_set\\container_usage\\container_c_10997.csv", index=False,header=False,mode='a')
print(i)
i += 1
上面的df是: