（病理图像读写）病理图像（whole slide images，WSI）的读写（.svs, .tiff）,使用openslide,和pyvips以及matlab

水博的两年半

已于 2024-01-02 13:28:47 修改

阅读量1.2w

点赞数 11

分类专栏：医疗图像处理病理图像处理文章标签： matlab python 开发语言病理图像读写 openslide

于 2022-08-02 10:59:59 首次发布

本文链接：https://blog.csdn.net/qq_34616741/article/details/126116770

版权

医疗图像处理同时被 2 个专栏收录

7 篇文章

订阅专栏

病理图像处理

3 篇文章

订阅专栏

这篇博客介绍了如何使用Python将mrxs文件转换为svs文件，并探讨了在病理图像处理中，pyvips和openslide库的读取速度差异。作者提供了一段代码，用于构建图像金字塔并保存为svs格式，同时提到了matlab在写tif文件时的优势。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

更新！感谢ONE_SIX_MIX 的分享。我找到了python写SVS文件的方法。

代码跑不通的可以直接作用：

mrxs转svs

import time
import numpy as np
from math import ceil
import tifffile
from tqdm import tqdm
import openslide
import cv2

# 设置tile的大小。不想设置的太小，所以是512
TILE_SIZE = 512
# 为了保证图像的整体大小是tile大小的整倍数，不然会出错。
def up_to512_manifi(hw):
    return int(ceil(hw[0]/TILE_SIZE)*TILE_SIZE), int(ceil(hw[1]/TILE_SIZE)*TILE_SIZE)
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):

    """
        这个函数用来按照输入的size和patch大小，生成每个patch所在原始的size上的位置

        keep_last_size：表示当size不能整除patch的size的时候，最后一个patch要不要保持输入的img_size
        
        返回：
            一个np数组，每个成员表示当前patch所在的x和y的起点和终点如：
                [[x_begin,x_end,y_begin,y_end],...]
    """
    height, width = ori_size[:2]
    index = []
    if height<img_size or width<img_size: 
        warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
        return index
        
    for h in range(0, height+1, stride):
        xe = h+img_size
        if h+img_size>height:
            xe = height
            h = xe-img_size if keep_last_size else h

        for w in range(0, width+1, stride):
            ye = w+img_size
            if w+img_size>width:
                ye = width
                w = ye-img_size if keep_last_size else w
            index.append(np.array([h, xe, w, ye]))

            if ye==width:
                break
        if xe==height:
            break
    return index
# 构建图像的迭代器。这是因为这个库要求输入是一个迭代器。每次迭代器输出一个tile的图像。
def gen_im(wsi, index):
    ind = 0
    while True:
    	_ind = index[ind]
        temp_img = wsi[ind[0]:ind[1], ind[2]:ind[3]]
        ind+=1
        yield temp_img
# 获取每个tile的索引输出是索引的数组。可以通过img = image[ind[0]:ind[1], ind[2]:ind[3]]的形式获取单个tile图像
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):
    height, width = ori_size[:2]
    index = []
    if height<img_size or width<img_size: 
        warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
        return index
        
    for h in range(0, height+1, stride):
        xe = h+img_size
        if h+img_size>height:
            xe = height
            h = xe-img_size if keep_last_size else h
        for w in range(0, width+1, stride):
            ye = w+img_size
            if w+img_size>width:
                ye = width
                w = ye-img_size if keep_last_size else w
            index.append(np.array([h, xe, w, ye]))

            if ye==width:
                break
        if xe==height:
            break
    return index

def gen_pyramid_tiff(image , out_file):
    svs_desc = 'Aperio Image Library Fake\nABC |AppMag = {mag}|Filename = {filename}|MPP = {mpp}'
    label_desc = 'Aperio Image Library Fake\nlabel {W}x{H}'
    macro_desc = 'Aperio Image Library Fake\nmacro {W}x{H}'
    odata = openslide.open_slide(in_file)
    # 指定mpp值
    mpp = 0.25
    # 指定缩放倍率
    mag = 40
    # 换算mpp值到分辨率
    resolution = [10000 / mpp, 10000 / mpp, 'CENTIMETER']
    # 指定图像名字
    filename = odata.properties['aperio.Filename']

    # tile 大小.tiff图像是分成多个tile存储的，所以需要设置我们想要的tile大小
    tile_hw = np.int64([TILE_SIZE, TILE_SIZE])
    width, height = image.shape[0:2]
    # 要需要的金字塔分辨率
    multi_hw = np.int64([(width, height), (width//2, height//2), 
                         (width//4, height//4), (width//8, height//8),
                         (width//16, height//16),
                         (width//32, height//32),
                         (width//64, height//64)])

    # 尝试写入 svs 格式
    with tifffile.TiffWriter(out_file, bigtiff=True) as tif:
        thw = tile_hw.tolist()
        # outcolorspace 要保持为默认的 YCbCr，不能使用rgb，否则颜色会异常
        # 95 是默认JPEG质量，值域是 0-100，值越大越接近无损
        compression = ['JPEG', 90, dict(outcolorspace='YCbCr')]
        kwargs = dict(subifds=0, photometric='rgb', planarconfig='CONTIG', compression=compression, dtype=np.uint8, metadata=None)

        for i, hw in enumerate(multi_hw):
            hw =  up_to16_manifi(hw)
            temp_wsi = cv2.resize(image, (hw[1], hw[0]))
            # 将需要存入的图像扩展到TILE_SIZE的倍数
            new_x, new_y = up_to512_manifi(hw)
            new_wsi = np.ones((new_x, new_y, 3),dtype=np.uint8)*255
            new_wsi[0:hw[0], 0:hw[1],:] = temp_wsi
            # 设置每个tile的索引
            index = gen_patches_index((new_x, new_y),img_size=TILE_SIZE,stride=TILE_SIZE)
            # 构建图像的迭代器。
            gen = gen_im(new_wsi, index)

            if i == 0:
                desc = svs_desc.format(mag=mag, filename=filename, mpp=mpp)
                # tile是每个小块的大小，它的长宽必须能被16整除
                tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description=desc, **kwargs)
            else:
                tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description='', **kwargs)

# img是一个npmpy图像(h,w,3)，save_path是存储的路径 .../a.svs
gen_pyramid_tiff(img, save_path)

以下是原文：
今天对病理图像的读写进行一个小介绍。先说结论，如果不需要读很小倍率的图像，pyvips比openslide快很多，写SVS的话用matlab。

病理图像的存储有很多种格式，可以看如下图：
在这里插入图片描述

常见的是.svs, .tif，.mrxs。今天就只讲.svs和.tif。.mrxs手上没有数据。但依然可以用openslide读取，以后有机会再展开。
在python上读取.svs和.tif的方式是一样的，因为.svs本质上就是一个tif，采用的数据格式和压缩方式都是一样的。示意图图下：
在这里插入图片描述

简单的读取可以如下：

from openslide import OpenSlide
filePath = 'file.tif'
slide   = OpenSlide(filePath)
#查看文件的金字塔结构，从中选取需要读取的层。
print(slide.level_dimensions)
# 输出结果如下：
# ((90624, 214528), (45312, 107264), (22656, 53632), (11328, 26816), (5664, 13408), (2832, 6704), (1416, 3352), (708, 1676), (354, 838), (177, 419))
#假如说需要读取40X的层，那就是第0层，代码如下：

img_40X = np.array(location=slide.read_region((0,0),level=0,size=slide.level_dimensions[0]),dtype = np.uint8)[:,:,0:3]
# level表示要读的层数，我们可以从slide.level_dimensions查看需要的层
# location表示读取时图像的起始坐标，需要注意的是，不管我们读取哪个level的图像，这个点都填的是40X的坐标。
#比如说我们读取40X下起始点是(100,100)终点是(324,324)大小的patch，那location=(100,100)，size=(224,224)，level=0
img_40X_small = np.array(location=slide.read_region((100,100),level=0,size=(324,324)),dtype = np.uint8)[:,:,0:3]
#如果需要读取10X的一样视野的图像那个，size就应该是(224//4,224//4), level=2, 但location依然是(100,100)
img_10X_small = np.array(location=slide.read_region((100,100),level=2,size=(224//4,224//4)),dtype = np.uint8)[:,:,0:3]

location的好处是允许我们自定义读取的起始点，有时候我们需要排除wsi中的空白，就可以根据所画的ROI来决定起始点和size。
另一种方式是使用pyvips来读取，这个就简单很多：

import pyvips
svs_file = 'file.svs'
# level 表示要读取的层数
img = pyvips.Image.new_from_file(svs_file, level=0)
# crop(起始点x，起始点y，长，宽)
img2 = img.crop(x1, y1, h, w)
img2 = np.asarray(img2, dtype=np.uint8)

pyvips的读取速度要比openslide快很多，如果不是需要读取level很高的图像，我都推荐用pyvips。我用工作站读取了一个（49821， 93298）大小的WSI，openslide花费162.94秒，pyvips花费42.79秒。越大的图像pyvips就会越快。

如果要写tif的话，我个人推荐用matlab。在python上写多层的tif文件我没有成功过，可能是因为python对tif的支持不好。写的代码如下：

function writesvs(imgdata, file_name)
    size2 = fix(size(imgdata));
    img_fist = imresize(imgdata, [size2(1) size2(2)]);
    
    size2 = fix(size(imgdata)/2);
    img_half = imresize(imgdata, [size2(1) size2(2)]);
    %
    size3 = fix(size(imgdata)/4);
    img_third = imresize(imgdata, [size3(1) size3(2)]);

    size4 = fix(size(imgdata)/8);
    img_four = imresize(imgdata, [size4(1) size4(2)]);
    %img_four = imgdata(1:64:end,1:64:end,:);

    t = Tiff(file_name,'w');
	%写40X的图像
	tagstruct.ImageDescription = "Aperio Image Library |AppMag = 40|MPP = 0.265018";
	tagstruct.ImageLength = size(img_fist ,1);
	tagstruct.ImageWidth = size(img_fist ,2);
	tagstruct.Photometric = Tiff.Photometric.RGB;
	tagstruct.BitsPerSample = 8;
	tagstruct.SamplesPerPixel = 3;
	tagstruct.RowsPerStrip = 16;
	tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
	tagstruct.Software = 'MATLAB';
	tagstruct.TileWidth = 240;
	tagstruct.TileLength = 240;
	tagstruct.Compression = 7;
	tagstruct.JPEGQuality = 80;
	setTag(t,tagstruct)

	write(t,img_fist );
	writeDirectory(t);

	%写20X的图像
    tagstruct2.ImageDescription = "Aperio Image Library |AppMag = 20|MPP = 0.51";
    tagstruct2.ImageLength = size(img_half,1);
    tagstruct2.ImageWidth = size(img_half,2);
    tagstruct2.Photometric = Tiff.Photometric.RGB;
    tagstruct2.BitsPerSample = 8;
    tagstruct2.SamplesPerPixel = 3;
    tagstruct2.RowsPerStrip = 16;
    tagstruct2.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct2.Software = 'MATLAB';
    tagstruct2.TileWidth = 240;
    tagstruct2.TileLength = 240;
    tagstruct2.Compression = 7;
    tagstruct2.JPEGQuality = 80;
    setTag(t,tagstruct2)

    write(t,img_half);
    writeDirectory(t);
    
	%写10X的图像
    tagstruct3.ImageDescription = "Aperio Image Library";
    tagstruct3.ImageLength = size(img_third,1);
    tagstruct3.ImageWidth = size(img_third,2);
    tagstruct3.Photometric = Tiff.Photometric.RGB;
    tagstruct3.BitsPerSample = 8;
    tagstruct3.SamplesPerPixel = 3;
    tagstruct3.RowsPerStrip = 16;
    tagstruct3.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct3.Software = 'MATLAB';
    tagstruct3.TileWidth = 240;
    tagstruct3.TileLength = 240;
    tagstruct3.Compression = 7;
    tagstruct3.JPEGQuality = 80;
    setTag(t,tagstruct3)

    write(t,img_third);
    writeDirectory(t);
    
	%写5X的图像
    tagstruct4.ImageDescription = "Aperio Image Library";
    tagstruct4.ImageLength = size(img_four,1);
    tagstruct4.ImageWidth = size(img_four,2);
    tagstruct4.Photometric = Tiff.Photometric.RGB;
    tagstruct4.BitsPerSample = 8;
    tagstruct4.SamplesPerPixel = 3;
    tagstruct4.RowsPerStrip = 16;
    tagstruct4.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct4.Software = 'MATLAB';
    tagstruct4.TileWidth = 240;
    tagstruct4.TileLength = 240;
    tagstruct4.Compression = 7;
    tagstruct4.JPEGQuality = 80;
    setTag(t,tagstruct4)

    write(t,img_four);
    close(t);
end