仅仅提供学习使用
程序
将原始的coco数据集,标注为矩形框,通过openslide和numpy等工具包进行图片切割。原始的图像大小不规则,100000x100000像素左右,运行下面程序即可得到2000x2000的,尺寸修改、路径等需要自行调整。
import openslide
from openslide.deepzoom import DeepZoomGenerator
import numpy as np
import json
from tqdm import tqdm
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import threading
import multiprocessing
label = open(r'D:\迅雷下载\核变裂象\mitos_wsi_ccmct_heael.json','r')
label = json.load(label)
IMAGE_SIZE = (2000,2000)
CLASS = label['categories']
# print(CLASS)
# print(label.keys())
# print('总共%d张图像,第一张图片信息'%len(label['images']),label['images'][0]) #32张图片
# print('总共%d个类别,第一个类别信息:'%len(label['categories']),label['categories'][0]) #七个类别
# print('标注数量:',len(label['annotations']))
# print('第一个标注信息',label['annotations'][0])
def get_bbox_class_per_image(label):
bboxes = [[] for i in range(len(label['images']))]
image_list = [image['id'] for image in label['images']]
for label in label['annotations']:
categories_id = label['category_id']
if not categories_id:
continue
image_id = label['image_id']
bbox = label['bbox']
bbox.append(categories_id)
bboxes[image_list.index(image_id)].append(bbox)
bboxes = [np.array(bbox,dtype='int') for bbox in bboxes] # length = 32
return bboxes
# print(get_bbox_class_per_image(label))
def get_image_label(label,box):
image_list = [image['id'] for image in label['images']]
for image, id in zip(label['images'], image_list):
if id != image['id']:
print('图片顺序不对')
continue
path = r'C:\Users\DELL\Desktop\data' + '\\' + image['file_name']
slide = openslide.open_slide(path)
w, h = slide.level_dimensions[0]
print('strat id:%d image'%image_list.index(id))
with tqdm(total=h // IMAGE_SIZE[1]*w // IMAGE_SIZE[0], desc=f'process {image_list.index(id)/len(image_list)}', postfix=dict, mininterval=0.3) as pbar:
for i in range(w // IMAGE_SIZE[0]):
for j in range(h // IMAGE_SIZE[1]):
# box_ = [] #wmin,hmin,wmax,hmax
mask_minx = box[image_list.index(id)][:, 2] >= i * 2000+25
mask_maxx = box[image_list.index(id)][:, 0] <= (i + 1) * 2000-25
mask_miny = box[image_list.index(id)][:, 3] >= j * 2000+25
mask_maxy = box[image_list.index(id)][:, 1] <= (j + 1) * 2000-25
mask_x_or = np.logical_and(mask_minx,mask_maxx)
mask_y_or = np.logical_and(mask_miny,mask_maxy)
mask = np.logical_and(mask_x_or,mask_y_or)
if not sum(mask):
continue
box_ = box[image_list.index(id)][mask]
box_[:,0:4:2] -= i*2000
box_[:,1:4:2] -= j*2000
box_ = np.clip(box_,0,2000)
np.savetxt('./output/ann/%s_%s_%s.txt'%(str(id-1),str(i),str(j)), box_, fmt='%d')
img = slide.read_region((i * 2000, j * 2000), 0, (2000, 2000))
img.save('./output/img/%s_%s_%s.png'%(str(id-1),str(i),str(j)))
pbar.update(1)
print('finish id:%d image' % image_list.index(id))
print('\n')
return 0
if __name__ == '__main__':
bboxes = get_bbox_class_per_image(label)
# print(np.max(bboxes[13],axis=0)) #wmin,hmin,wmax,hmax
print(len(bboxes[0]))
get_image_label(label,bboxes)
下面是数据验证的程序以及数据验证的结果:
import cv2
import numpy as np
import matplotlib.pyplot as plt
ann = r'D:\PycharmProjects\simple_skills\切割图像\output\ann\0_9_25.txt'
img = r'D:\PycharmProjects\simple_skills\切割图像\output\img\0_9_25.png'
with open(ann,'r') as f:
a = f.readlines()
# for i in a:
# print(i.rstrip().split(' '))
a = [i.rstrip().split(' ') for i in a]
a = np.array(a,dtype='int')
img = plt.imread(img)*255
img = img.astype('int')
for i in a:
cv2.rectangle(img,i[:2],i[2:4],(0,255,0),2)
plt.imshow(img)
plt.show()
总结
老老实实的迈好每一步,相信成功就在你面前 甘愿为理想“头破血流”附录
多线程操作
import openslide
from openslide.deepzoom import DeepZoomGenerator
import numpy as np
import json
from tqdm import tqdm
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import threading
import multiprocessing
# label = open(r'D:\迅雷下载\核变裂象\mitos_wsi_ccmct_heael.json','r')
# label = json.load(label)
# IMAGE_SIZE = (scale,scale)
# CLASS = label['categories']
# print(CLASS)
# print(label.keys())
# print('总共%d张图像,第一张图片信息'%len(label['images']),label['images'][0]) #32张图片
# print('总共%d个类别,第一个类别信息:'%len(label['categories']),label['categories'][0]) #七个类别
# print('标注数量:',len(label['annotations']))
# print('第一个标注信息',label['annotations'][0])
def get_bbox_class_per_image(label):
bboxes = [[] for i in range(len(label['images']))]
image_list = [image['id'] for image in label['images']]
for label in label['annotations']:
categories_id = label['category_id']
if not categories_id:
continue
image_id = label['image_id']
bbox = label['bbox']
bbox.append(categories_id)
bboxes[image_list.index(image_id)].append(bbox)
bboxes = [np.array(bbox,dtype='int') for bbox in bboxes] # length = 32
return bboxes
# print(get_bbox_class_per_image(label))
def get_image_label(image,id,image_list,box,IMAGE_SIZE,scale):
path = r'D:\data' + '\\' + image['file_name']
slide = openslide.open_slide(path)
w, h = slide.level_dimensions[0]
print(w,h)
print('strat id:%d image'%image_list.index(id))
with tqdm(total=h // IMAGE_SIZE[1]*w // IMAGE_SIZE[0], desc=f'process {image_list.index(id)/len(image_list)}', postfix=dict, mininterval=0.3) as pbar:
for i in range(w // IMAGE_SIZE[0]):
for j in range(h // IMAGE_SIZE[1]):
# box_ = [] #wmin,hmin,wmax,hmax
mask_minx = box[image_list.index(id)][:, 2] >= i * scale+25
mask_maxx = box[image_list.index(id)][:, 0] <= (i + 1) * scale-25
mask_miny = box[image_list.index(id)][:, 3] >= j * scale+25
mask_maxy = box[image_list.index(id)][:, 1] <= (j + 1) * scale-25
mask_x_or = np.logical_and(mask_minx,mask_maxx)
mask_y_or = np.logical_and(mask_miny,mask_maxy)
mask = np.logical_and(mask_x_or,mask_y_or)
if not sum(mask):
pbar.update(1)
continue
box_ = box[image_list.index(id)][mask]
box_[:,0:4:2] -= i*scale
box_[:,1:4:2] -= j*scale
box_ = np.clip(box_,0,scale)
np.savetxt(r'D:\output\Annotations_txt_mel\%s_%s_%s.txt'%(str(id-1),str(i),str(j)), box_, fmt='%d')
img = slide.read_region((i * scale, j * scale), 0, (scale, scale))
img.save(r'D:\output\img_mel\%s_%s_%s.png'%(str(id-1),str(i),str(j)))
pbar.update(1)
print('finish id:%d image' % image_list.index(id))
print('\n')
def print_error(value):
print("error: ", value)
if __name__ == '__main__':
print('开始运行主线程')
multiprocessing.freeze_support()
multiprocessing.Process()
pool = multiprocessing.Pool(multiprocessing.cpu_count())
label = open(r'D:\迅雷下载\核变裂象\.mitos_wsi_ccmct_meljson', 'r')
label = json.load(label)
IMAGE_SIZE = (640, 640)
CLASS = label['categories']
scale = 640
bboxes = get_bbox_class_per_image(label)
# print(np.max(bboxes[13],axis=0)) #wmin,hmin,wmax,hmax
print(len(bboxes[0]))
# get_image_label(label,bboxes)
image_list = [image['id'] for image in label['images']]
print('开始运行子线程')
for image, id in zip(label['images'], image_list):
if id != image['id']:
print('图片顺序不对')
continue
pool.apply_async(func=get_image_label,args=[image,id,image_list,bboxes,IMAGE_SIZE,scale,],callback=print_error)
pool.close()
pool.join()
print('主线程运行结束')