更新!感谢ONE_SIX_MIX 的分享。我找到了python写SVS文件的方法。
代码跑不通的可以直接作用:
import time
import numpy as np
from math import ceil
import tifffile
from tqdm import tqdm
import openslide
import cv2
# 设置tile的大小。不想设置的太小,所以是512
TILE_SIZE = 512
# 为了保证图像的整体大小是tile大小的整倍数,不然会出错。
def up_to512_manifi(hw):
return int(ceil(hw[0]/TILE_SIZE)*TILE_SIZE), int(ceil(hw[1]/TILE_SIZE)*TILE_SIZE)
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):
"""
这个函数用来按照输入的size和patch大小,生成每个patch所在原始的size上的位置
keep_last_size:表示当size不能整除patch的size的时候,最后一个patch要不要保持输入的img_size
返回:
一个np数组,每个成员表示当前patch所在的x和y的起点和终点如:
[[x_begin,x_end,y_begin,y_end],...]
"""
height, width = ori_size[:2]
index = []
if height<img_size or width<img_size:
warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
return index
for h in range(0, height+1, stride):
xe = h+img_size
if h+img_size>height:
xe = height
h = xe-img_size if keep_last_size else h
for w in range(0, width+1, stride):
ye = w+img_size
if w+img_size>width:
ye = width
w = ye-img_size if keep_last_size else w
index.append(np.array([h, xe, w, ye]))
if ye==width:
break
if xe==height:
break
return index
# 构建图像的迭代器。这是因为这个库要求输入是一个迭代器。每次迭代器输出一个tile的图像。
def gen_im(wsi, index):
ind = 0
while True:
_ind = index[ind]
temp_img = wsi[ind[0]:ind[1], ind[2]:ind[3]]
ind+=1
yield temp_img
# 获取每个tile的索引输出是索引的数组。可以通过img = image[ind[0]:ind[1], ind[2]:ind[3]]的形式获取单个tile图像
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):
height, width = ori_size[:2]
index = []
if height<img_size or width<img_size:
warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
return index
for h in range(0, height+1, stride):
xe = h+img_size
if h+img_size>height:
xe = height
h = xe-img_size if keep_last_size else h
for w in range(0, width+1, stride):
ye = w+img_size
if w+img_size>width:
ye = width
w = ye-img_size if keep_last_size else w
index.append(np.array([h, xe, w, ye]))
if ye==width:
break
if xe==height:
break
return index
def gen_pyramid_tiff(image , out_file):
svs_desc = 'Aperio Image Library Fake\nABC |AppMag = {mag}|Filename = {filename}|MPP = {mpp}'
label_desc = 'Aperio Image Library Fake\nlabel {W}x{H}'
macro_desc = 'Aperio Image Library Fake\nmacro {W}x{H}'
odata = openslide.open_slide(in_file)
# 指定mpp值
mpp = 0.25
# 指定缩放倍率
mag = 40
# 换算mpp值到分辨率
resolution = [10000 / mpp, 10000 / mpp, 'CENTIMETER']
# 指定图像名字
filename = odata.properties['aperio.Filename']
# tile 大小.tiff图像是分成多个tile存储的,所以需要设置我们想要的tile大小
tile_hw = np.int64([TILE_SIZE, TILE_SIZE])
width, height = image.shape[0:2]
# 要需要的金字塔分辨率
multi_hw = np.int64([(width, height), (width//2, height//2),
(width//4, height//4), (width//8, height//8),
(width//16, height//16),
(width//32, height//32),
(width//64, height//64)])
# 尝试写入 svs 格式
with tifffile.TiffWriter(out_file, bigtiff=True) as tif:
thw = tile_hw.tolist()
# outcolorspace 要保持为默认的 YCbCr,不能使用rgb,否则颜色会异常
# 95 是默认JPEG质量,值域是 0-100,值越大越接近无损
compression = ['JPEG', 90, dict(outcolorspace='YCbCr')]
kwargs = dict(subifds=0, photometric='rgb', planarconfig='CONTIG', compression=compression, dtype=np.uint8, metadata=None)
for i, hw in enumerate(multi_hw):
hw = up_to16_manifi(hw)
temp_wsi = cv2.resize(image, (hw[1], hw[0]))
# 将需要存入的图像扩展到TILE_SIZE的倍数
new_x, new_y = up_to512_manifi(hw)
new_wsi = np.ones((new_x, new_y, 3),dtype=np.uint8)*255
new_wsi[0:hw[0], 0:hw[1],:] = temp_wsi
# 设置每个tile的索引
index = gen_patches_index((new_x, new_y),img_size=TILE_SIZE,stride=TILE_SIZE)
# 构建图像的迭代器。
gen = gen_im(new_wsi, index)
if i == 0:
desc = svs_desc.format(mag=mag, filename=filename, mpp=mpp)
# tile是每个小块的大小,它的长宽必须能被16整除
tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description=desc, **kwargs)
else:
tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description='', **kwargs)
# img是一个npmpy图像(h,w,3),save_path是存储的路径 .../a.svs
gen_pyramid_tiff(img, save_path)
以下是原文:
今天对病理图像的读写进行一个小介绍。先说结论,如果不需要读很小倍率的图像,pyvips比openslide快很多,写SVS的话用matlab。
病理图像的存储有很多种格式,可以看如下图:
常见的是.svs, .tif,.mrxs。今天就只讲.svs和.tif。.mrxs手上没有数据。但依然可以用openslide读取,以后有机会再展开。
在python上读取.svs和.tif的方式是一样的,因为.svs本质上就是一个tif,采用的数据格式和压缩方式都是一样的。示意图图下:
简单的读取可以如下:
from openslide import OpenSlide
filePath = 'file.tif'
slide = OpenSlide(filePath)
#查看文件的金字塔结构,从中选取需要读取的层。
print(slide.level_dimensions)
# 输出结果如下:
# ((90624, 214528), (45312, 107264), (22656, 53632), (11328, 26816), (5664, 13408), (2832, 6704), (1416, 3352), (708, 1676), (354, 838), (177, 419))
#假如说需要读取40X的层,那就是第0层,代码如下:
img_40X = np.array(location=slide.read_region((0,0),level=0,size=slide.level_dimensions[0]),dtype = np.uint8)[:,:,0:3]
# level表示要读的层数,我们可以从slide.level_dimensions查看需要的层
# location表示读取时图像的起始坐标,需要注意的是,不管我们读取哪个level的图像,这个点都填的是40X的坐标。
#比如说我们读取40X下起始点是(100,100)终点是(324,324)大小的patch,那location=(100,100),size=(224,224),level=0
img_40X_small = np.array(location=slide.read_region((100,100),level=0,size=(324,324)),dtype = np.uint8)[:,:,0:3]
#如果需要读取10X的一样视野的图像那个,size就应该是(224//4,224//4), level=2, 但location依然是(100,100)
img_10X_small = np.array(location=slide.read_region((100,100),level=2,size=(224//4,224//4)),dtype = np.uint8)[:,:,0:3]
location的好处是允许我们自定义读取的起始点,有时候我们需要排除wsi中的空白,就可以根据所画的ROI来决定起始点和size。
另一种方式是使用pyvips来读取,这个就简单很多:
import pyvips
svs_file = 'file.svs'
# level 表示要读取的层数
img = pyvips.Image.new_from_file(svs_file, level=0)
# crop(起始点x,起始点y,长,宽)
img2 = img.crop(x1, y1, h, w)
img2 = np.asarray(img2, dtype=np.uint8)
pyvips的读取速度要比openslide快很多,如果不是需要读取level很高的图像,我都推荐用pyvips。我用工作站读取了一个(49821, 93298)大小的WSI,openslide花费162.94秒,pyvips花费42.79秒。越大的图像pyvips就会越快。
如果要写tif的话,我个人推荐用matlab。在python上写多层的tif文件我没有成功过,可能是因为python对tif的支持不好。写的代码如下:
function writesvs(imgdata, file_name)
size2 = fix(size(imgdata));
img_fist = imresize(imgdata, [size2(1) size2(2)]);
size2 = fix(size(imgdata)/2);
img_half = imresize(imgdata, [size2(1) size2(2)]);
%
size3 = fix(size(imgdata)/4);
img_third = imresize(imgdata, [size3(1) size3(2)]);
size4 = fix(size(imgdata)/8);
img_four = imresize(imgdata, [size4(1) size4(2)]);
%img_four = imgdata(1:64:end,1:64:end,:);
t = Tiff(file_name,'w');
%写40X的图像
tagstruct.ImageDescription = "Aperio Image Library |AppMag = 40|MPP = 0.265018";
tagstruct.ImageLength = size(img_fist ,1);
tagstruct.ImageWidth = size(img_fist ,2);
tagstruct.Photometric = Tiff.Photometric.RGB;
tagstruct.BitsPerSample = 8;
tagstruct.SamplesPerPixel = 3;
tagstruct.RowsPerStrip = 16;
tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
tagstruct.Software = 'MATLAB';
tagstruct.TileWidth = 240;
tagstruct.TileLength = 240;
tagstruct.Compression = 7;
tagstruct.JPEGQuality = 80;
setTag(t,tagstruct)
write(t,img_fist );
writeDirectory(t);
%写20X的图像
tagstruct2.ImageDescription = "Aperio Image Library |AppMag = 20|MPP = 0.51";
tagstruct2.ImageLength = size(img_half,1);
tagstruct2.ImageWidth = size(img_half,2);
tagstruct2.Photometric = Tiff.Photometric.RGB;
tagstruct2.BitsPerSample = 8;
tagstruct2.SamplesPerPixel = 3;
tagstruct2.RowsPerStrip = 16;
tagstruct2.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
tagstruct2.Software = 'MATLAB';
tagstruct2.TileWidth = 240;
tagstruct2.TileLength = 240;
tagstruct2.Compression = 7;
tagstruct2.JPEGQuality = 80;
setTag(t,tagstruct2)
write(t,img_half);
writeDirectory(t);
%写10X的图像
tagstruct3.ImageDescription = "Aperio Image Library";
tagstruct3.ImageLength = size(img_third,1);
tagstruct3.ImageWidth = size(img_third,2);
tagstruct3.Photometric = Tiff.Photometric.RGB;
tagstruct3.BitsPerSample = 8;
tagstruct3.SamplesPerPixel = 3;
tagstruct3.RowsPerStrip = 16;
tagstruct3.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
tagstruct3.Software = 'MATLAB';
tagstruct3.TileWidth = 240;
tagstruct3.TileLength = 240;
tagstruct3.Compression = 7;
tagstruct3.JPEGQuality = 80;
setTag(t,tagstruct3)
write(t,img_third);
writeDirectory(t);
%写5X的图像
tagstruct4.ImageDescription = "Aperio Image Library";
tagstruct4.ImageLength = size(img_four,1);
tagstruct4.ImageWidth = size(img_four,2);
tagstruct4.Photometric = Tiff.Photometric.RGB;
tagstruct4.BitsPerSample = 8;
tagstruct4.SamplesPerPixel = 3;
tagstruct4.RowsPerStrip = 16;
tagstruct4.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
tagstruct4.Software = 'MATLAB';
tagstruct4.TileWidth = 240;
tagstruct4.TileLength = 240;
tagstruct4.Compression = 7;
tagstruct4.JPEGQuality = 80;
setTag(t,tagstruct4)
write(t,img_four);
close(t);
end