In this notebook, we compare cuCIM with OpenSlide in a multi-thread/multi-process environment.
input/image2.tif
file (whose size is 92344x81017 and tile size is 256x256) is used.
Since cuCIM doesn't implement internal cache yet, according to start_location
variable in the experiment code, cuCIM would have a different performance.
For the first case (start_location = 0
), when we try to read the whole image starting from (0,0) with 256x256 patch size, both OpenSlide and cuCIM would read each time only once.
However, in the second case (start_location = 1
) that starts reading patch from (1,1), cuCIM would have a disadvantage -- for the second patch (second red box), cuCIM should need four tiles whereas OpenSlide would use only two tiles (two tiles in the middle would be cached when OpenSlide read the first patch).
Note:: cuCIM would support cache mechanism in the near future.
The following system is used to conduct the experiment:
The following code expects that OpenSlide is installed on your system. Please execute the following commands to install it.
!conda install -c conda-forge openslide
or
!apt-get install --yes --fix-missing --no-install-recommends libopenslide0
!pip install openslide-python
#!conda install -c conda-forge openslide
# or
#!apt-get install --yes --fix-missing --no-install-recommends libopenslide0
#!pip install openslide-python
from contextlib import ContextDecorator
from time import perf_counter
class Timer(ContextDecorator):
def __init__(self, message):
self.message = message
self.end = None
def elapsed_time(self):
self.end = perf_counter()
return self.end - self.start
def __enter__(self):
self.start = perf_counter()
return self
def __exit__(self, exc_type, exc, exc_tb):
if not self.end:
self.elapsed_time()
print("{} : {}".format(self.message, self.end - self.start))
import numpy as np
from openslide import OpenSlide
import concurrent.futures
from cucim import CuImage
import os
num_threads = os.cpu_count()
input_file = "input/image2.tif"
start_location = 0
patch_size = 256
def load_tile_openslide(slide, start_loc, patch_size):
region = slide.read_region(start_loc, 0, [patch_size, patch_size])
def load_tile_cucim(slide, start_loc, patch_size):
region = slide.read_region(start_loc, [patch_size, patch_size], 0)
openslide_tot_time = 0
cucim_tot_time = 0
for num_workers in range(1, num_threads + 1):
print("# of thread : {}".format(num_workers))
openslide_time = 0
# (92344 x 81017)
with OpenSlide(input_file) as slide:
width, height = slide.dimensions
count = 0
for h in range(start_location, height, patch_size):
for w in range(start_location, width, patch_size):
count += 1
start_loc_iter = ((sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Thread elapsed time (OpenSlide)") as timer:
with concurrent.futures.ThreadPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
lambda start_loc: load_tile_openslide(slide, start_loc, patch_size),
start_loc_iter,
)
openslide_time = timer.elapsed_time()
openslide_tot_time += openslide_time
cucim_time = 0
slide = CuImage(input_file)
start_loc_iter = ((sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Thread elapsed time (cuCIM)") as timer:
with concurrent.futures.ThreadPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
lambda start_loc: load_tile_cucim(slide, start_loc, patch_size),
start_loc_iter,
)
cucim_time = timer.elapsed_time()
cucim_tot_time += cucim_time
print(" Performance gain (OpenSlide/cuCIM): {}".format(openslide_time / cucim_time))
print("Total time (OpenSlide):", openslide_tot_time)
print("Total time (cuCIM):", cucim_tot_time)
print("Average performance gain (OpenSlide/cuCIM): {}".format(openslide_tot_time / cucim_tot_time))
# of thread : 1
Thread elapsed time (OpenSlide): 203.12034743092954
Thread elapsed time (cuCIM): 35.13261566311121
# of thread : 2
Thread elapsed time (OpenSlide): 102.09872915921733
Thread elapsed time (cuCIM): 19.746847699861974
# of thread : 3
Thread elapsed time (OpenSlide): 69.23487223219126
Thread elapsed time (cuCIM): 14.231686793733388
# of thread : 4
Thread elapsed time (OpenSlide): 53.13889923784882
Thread elapsed time (cuCIM): 11.085060752928257
# of thread : 5
Thread elapsed time (OpenSlide): 44.01897697104141
Thread elapsed time (cuCIM): 9.731189775746316
# of thread : 6
Thread elapsed time (OpenSlide): 39.78462764201686
Thread elapsed time (cuCIM): 9.279538444709033
# of thread : 7
Thread elapsed time (OpenSlide): 39.40460350224748
Thread elapsed time (cuCIM): 8.312216511927545
# of thread : 8
Thread elapsed time (OpenSlide): 38.2298303861171
Thread elapsed time (cuCIM): 8.083018650766462
# of thread : 9
Thread elapsed time (OpenSlide): 36.2004044582136
Thread elapsed time (cuCIM): 7.664179248735309
# of thread : 10
Thread elapsed time (OpenSlide): 35.32523050904274
Thread elapsed time (cuCIM): 8.259015129879117
# of thread : 11
Thread elapsed time (OpenSlide): 34.73069435125217
Thread elapsed time (cuCIM): 7.8271108330227435
# of thread : 12
Thread elapsed time (OpenSlide): 35.79060472594574
Thread elapsed time (cuCIM): 8.684423762373626
# of thread : 1
Thread elapsed time (OpenSlide): 246.3082786342129
Thread elapsed time (cuCIM): 125.12755820900202
# of thread : 2
Thread elapsed time (OpenSlide): 123.19027538970113
Thread elapsed time (cuCIM): 68.67328959237784
# of thread : 3
Thread elapsed time (OpenSlide): 83.65639087790623
Thread elapsed time (cuCIM): 46.031415150966495
# of thread : 4
Thread elapsed time (OpenSlide): 63.73335528932512
Thread elapsed time (cuCIM): 35.13549166591838
# of thread : 5
Thread elapsed time (OpenSlide): 52.45986012322828
Thread elapsed time (cuCIM): 28.303977627772838
# of thread : 6
Thread elapsed time (OpenSlide): 46.916810180060565
Thread elapsed time (cuCIM): 25.7577864988707
# of thread : 7
Thread elapsed time (OpenSlide): 45.930785153992474
Thread elapsed time (cuCIM): 24.895688469987363
# of thread : 8
Thread elapsed time (OpenSlide): 45.12975976616144
Thread elapsed time (cuCIM): 22.422960069030523
# of thread : 9
Thread elapsed time (OpenSlide): 43.284258441999555
Thread elapsed time (cuCIM): 22.672365427017212
# of thread : 10
Thread elapsed time (OpenSlide): 41.37739813886583
Thread elapsed time (cuCIM): 20.014441611245275
# of thread : 11
Thread elapsed time (OpenSlide): 40.737238076049834
Thread elapsed time (cuCIM): 19.632989757228643
# of thread : 12
Thread elapsed time (OpenSlide): 40.8493790011853
Thread elapsed time (cuCIM): 19.66802476812154
For each patch, it open the image file.
import concurrent.futures
from itertools import repeat
import numpy as np
from openslide import OpenSlide
from cucim import CuImage
import os
num_processes = os.cpu_count()
input_file = "input/image2.tif"
start_location = 0
patch_size = 256
def load_tile_openslide_mp(inp_file, start_loc, patch_size):
with OpenSlide(inp_file) as slide:
region = slide.read_region(start_loc, 0, [patch_size, patch_size])
def load_tile_cucim_mp(inp_file, start_loc, patch_size):
slide = CuImage(inp_file)
region = slide.read_region(start_loc, [patch_size, patch_size], 0)
openslide_tot_time = 0
cucim_tot_time = 0
for num_workers in range(1, num_processes + 1):
print("# of processes : {}".format(num_workers))
openslide_time = 0
# (92344 x 81017)
with OpenSlide(input_file) as slide:
width, height = slide.dimensions
start_loc_iter = ((sy, sx)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Process elapsed time (OpenSlide)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
load_tile_openslide_mp,
repeat(input_file),
start_loc_iter,
repeat(patch_size)
)
openslide_time = timer.elapsed_time()
openslide_tot_time += openslide_time
cucim_time = 0
slide = CuImage(input_file)
start_loc_iter = ((sy, sx)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Process elapsed time (cuCIM)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
load_tile_cucim_mp,
repeat(input_file),
start_loc_iter,
repeat(patch_size)
)
cucim_time = timer.elapsed_time()
cucim_tot_time += cucim_time
print(" Performance gain (OpenSlide/cuCIM): {}".format(openslide_time / cucim_time))
print("Total time (OpenSlide):", openslide_tot_time)
print("Total time (cuCIM):", cucim_tot_time)
print("Average performance gain (OpenSlide/cuCIM): {}".format(openslide_tot_time / cucim_tot_time))
# of processes : 1
Process elapsed time (OpenSlide): 441.52679147804156
Process elapsed time (cuCIM): 208.89401917299256
# of processes : 2
Process elapsed time (OpenSlide): 235.22407114738598
Process elapsed time (cuCIM): 115.76672784099355
# of processes : 3
Process elapsed time (OpenSlide): 169.28083365410566
Process elapsed time (cuCIM): 91.57951975474134
# of processes : 4
Process elapsed time (OpenSlide): 138.3362634689547
Process elapsed time (cuCIM): 78.2894302061759
# of processes : 5
Process elapsed time (OpenSlide): 121.89170560985804
Process elapsed time (cuCIM): 74.86900206608698
# of processes : 6
Process elapsed time (OpenSlide): 110.64038014411926
Process elapsed time (cuCIM): 71.43692379305139
# of processes : 7
Process elapsed time (OpenSlide): 101.48756717005745
Process elapsed time (cuCIM): 74.7042864956893
# of processes : 8
Process elapsed time (OpenSlide): 96.16556345298886
Process elapsed time (cuCIM): 71.8208787702024
# of processes : 9
Process elapsed time (OpenSlide): 92.71181897399947
Process elapsed time (cuCIM): 72.84391884505749
# of processes : 10
Process elapsed time (OpenSlide): 91.19949483824894
Process elapsed time (cuCIM): 78.10580187477171
# of processes : 11
Process elapsed time (OpenSlide): 91.57920746784657
Process elapsed time (cuCIM): 78.9079754636623
# of processes : 12
Process elapsed time (OpenSlide): 90.7518733246252
Process elapsed time (cuCIM): 76.84036188805476
For each process, reuse the opened file but submit a job for each patch request.
import concurrent.futures
from itertools import repeat
from functools import partial
import numpy as np
from openslide import OpenSlide
from cucim import CuImage
import os
num_processes = os.cpu_count()
input_file = "input/image2.tif"
start_location = 0
patch_size = 256
is_process_initialized = False
openslide_obj = None
cucim_obj = None
def load_tile_openslide_mp(slide, start_loc, patch_size):
region = slide.read_region(start_loc, 0, [patch_size, patch_size])
def proc_init_openslide(inp_file, f, *iters):
global is_process_initialized, openslide_obj
if not is_process_initialized:
is_process_initialized = True
openslide_obj = OpenSlide(inp_file)
return f(openslide_obj, *iters)
def load_tile_cucim_mp(slide, start_loc, patch_size):
region = slide.read_region(start_loc, [patch_size, patch_size], 0)
def proc_init_cucim(inp_file, f, *iters):
global is_process_initialized, cucim_obj
if not is_process_initialized:
is_process_initialized = True
cucim_obj = CuImage(inp_file)
return f(cucim_obj, *iters)
openslide_tot_time = 0
cucim_tot_time = 0
for num_workers in range(1, num_processes + 1):
print("# of processes : {}".format(num_workers))
openslide_time = 0
# (92344 x 81017)
with OpenSlide(input_file) as slide:
width, height = slide.dimensions
start_loc_iter = ((sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Process elapsed time (OpenSlide)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
partial(proc_init_openslide, input_file, load_tile_openslide_mp),
start_loc_iter,
repeat(patch_size)
)
openslide_time = timer.elapsed_time()
openslide_tot_time += openslide_time
cucim_time = 0
slide = CuImage(input_file)
start_loc_iter = ((sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size))
with Timer(" Process elapsed time (cuCIM)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
partial(proc_init_cucim, input_file, load_tile_cucim_mp),
start_loc_iter,
repeat(patch_size)
)
cucim_time = timer.elapsed_time()
cucim_tot_time += cucim_time
print(" Performance gain (OpenSlide/cuCIM): {}".format(openslide_time / cucim_time))
print("Total time (OpenSlide):", openslide_tot_time)
print("Total time (cuCIM):", cucim_tot_time)
print("Average performance gain (OpenSlide/cuCIM): {}".format(openslide_tot_time / cucim_tot_time))
# of processes : 1
Process elapsed time (OpenSlide): 208.6686624987051
Process elapsed time (cuCIM): 48.001787026878446
# of processes : 2
Process elapsed time (OpenSlide): 108.32851185882464
Process elapsed time (cuCIM): 27.654730859212577
# of processes : 3
Process elapsed time (OpenSlide): 75.08803005004302
Process elapsed time (cuCIM): 21.817759499885142
# of processes : 4
Process elapsed time (OpenSlide): 59.7227668906562
Process elapsed time (cuCIM): 20.43205594085157
# of processes : 5
Process elapsed time (OpenSlide): 51.258338663727045
Process elapsed time (cuCIM): 20.458562731277198
# of processes : 6
Process elapsed time (OpenSlide): 46.47623342694715
Process elapsed time (cuCIM): 20.85869163228199
# of processes : 7
Process elapsed time (OpenSlide): 46.49370166473091
Process elapsed time (cuCIM): 21.7327726688236
# of processes : 8
Process elapsed time (OpenSlide): 45.238605635240674
Process elapsed time (cuCIM): 22.58527811197564
# of processes : 9
Process elapsed time (OpenSlide): 44.749732580035925
Process elapsed time (cuCIM): 23.556206807959825
# of processes : 10
Process elapsed time (OpenSlide): 44.475309615023434
Process elapsed time (cuCIM): 24.051936954259872
# of processes : 11
Process elapsed time (OpenSlide): 44.4071687720716
Process elapsed time (cuCIM): 25.294292493723333
# of processes : 12
Process elapsed time (OpenSlide): 44.7593243108131
Process elapsed time (cuCIM): 25.84700824506581
Patch requests are divided into multiple processes and, for each process, request only one job with the list of patch requests.
import concurrent.futures
from itertools import repeat
import numpy as np
from openslide import OpenSlide
from cucim import CuImage
import os
num_processes = os.cpu_count()
input_file = "input/image2.tif"
start_location = 0
patch_size = 256
def load_tile_openslide_chunk_mp(inp_file, start_loc_list, patch_size):
with OpenSlide(inp_file) as slide:
for start_loc in start_loc_list:
region = slide.read_region(start_loc, 0, [patch_size, patch_size])
def load_tile_cucim_chunk_mp(inp_file, start_loc_list, patch_size):
slide = CuImage(inp_file)
for start_loc in start_loc_list:
region = slide.read_region(start_loc, [patch_size, patch_size], 0)
openslide_tot_time = 0
cucim_tot_time = 0
print("Total # of processes : {}".format(num_processes))
for num_workers in range(1, num_processes + 1):
print("# of processes : {}".format(num_workers))
openslide_time = 0
# (92344 x 81017)
with OpenSlide(input_file) as slide:
width, height = slide.dimensions
start_loc_data = [(sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size)]
chunk_size = len(start_loc_data) // num_workers
start_loc_list_iter = [start_loc_data[i:i+chunk_size] for i in range(0, len(start_loc_data), chunk_size)]
with Timer(" Process elapsed time (OpenSlide)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
load_tile_openslide_chunk_mp,
repeat(input_file),
start_loc_list_iter,
repeat(patch_size)
)
openslide_time = timer.elapsed_time()
openslide_tot_time += openslide_time
cucim_time = 0
slide = CuImage(input_file)
start_loc_data = [(sx, sy)
for sy in range(start_location, height, patch_size)
for sx in range(start_location, width, patch_size)]
chunk_size = len(start_loc_data) // num_workers
start_loc_list_iter = [start_loc_data[i:i+chunk_size] for i in range(0, len(start_loc_data), chunk_size)]
with Timer(" Process elapsed time (cuCIM)") as timer:
with concurrent.futures.ProcessPoolExecutor(
max_workers=num_workers
) as executor:
executor.map(
load_tile_cucim_chunk_mp,
repeat(input_file),
start_loc_list_iter,
repeat(patch_size)
)
cucim_time = timer.elapsed_time()
cucim_tot_time += cucim_time
print(" Performance gain (OpenSlide/cuCIM): {}".format(openslide_time / cucim_time))
print("Total time (OpenSlide):", openslide_tot_time)
print("Total time (cuCIM):", cucim_tot_time)
print("Average performance gain (OpenSlide/cuCIM): {}".format(openslide_tot_time / cucim_tot_time))
# of processes : 1
Process elapsed time (OpenSlide): 198.9614152610302
Process elapsed time (cuCIM): 34.512199216056615
# of processes : 2
Process elapsed time (OpenSlide): 101.16406151233241
Process elapsed time (cuCIM): 18.7446903497912
# of processes : 3
Process elapsed time (OpenSlide): 68.10482547199354
Process elapsed time (cuCIM): 12.816827611997724
# of processes : 4
Process elapsed time (OpenSlide): 51.85946137504652
Process elapsed time (cuCIM): 9.313994630239904
# of processes : 5
Process elapsed time (OpenSlide): 41.984213249292225
Process elapsed time (cuCIM): 7.512824849225581
# of processes : 6
Process elapsed time (OpenSlide): 37.449110239744186
Process elapsed time (cuCIM): 6.9438614239916205
# of processes : 7
Process elapsed time (OpenSlide): 37.975524694658816
Process elapsed time (cuCIM): 6.320528977084905
# of processes : 8
Process elapsed time (OpenSlide): 36.37545741070062
Process elapsed time (cuCIM): 6.549180408939719
# of processes : 9
Process elapsed time (OpenSlide): 36.17362955166027
Process elapsed time (cuCIM): 5.6686060433276
# of processes : 10
Process elapsed time (OpenSlide): 34.56402690522373
Process elapsed time (cuCIM): 5.5428653210401535
# of processes : 11
Process elapsed time (OpenSlide): 33.02037419890985
Process elapsed time (cuCIM): 5.224415393080562
# of processes : 12
Process elapsed time (OpenSlide): 32.9791039316915
Process elapsed time (cuCIM): 5.0348134520463645
# of processes : 1
Process elapsed time (OpenSlide): 240.61588192591444
Process elapsed time (cuCIM): 131.02941245539114
# of processes : 2
Process elapsed time (OpenSlide): 123.80615371605381
Process elapsed time (cuCIM): 71.65121614700183
# of processes : 3
Process elapsed time (OpenSlide): 83.54661530908197
Process elapsed time (cuCIM): 47.34036159096286
# of processes : 4
Process elapsed time (OpenSlide): 63.7056167148985
Process elapsed time (cuCIM): 37.40374026214704
# of processes : 5
Process elapsed time (OpenSlide): 51.50155539019033
Process elapsed time (cuCIM): 27.897105684969574
# of processes : 6
Process elapsed time (OpenSlide): 44.712373277172446
Process elapsed time (cuCIM): 25.32637894200161
# of processes : 7
Process elapsed time (OpenSlide): 44.199173680040985
Process elapsed time (cuCIM): 19.60028947889805
# of processes : 8
Process elapsed time (OpenSlide): 44.04563747579232
Process elapsed time (cuCIM): 20.579743378795683
# of processes : 9
Process elapsed time (OpenSlide): 41.323462426662445
Process elapsed time (cuCIM): 17.126207023859024
# of processes : 10
Process elapsed time (OpenSlide): 40.54832462500781
Process elapsed time (cuCIM): 12.304737649857998
# of processes : 11
Process elapsed time (OpenSlide): 39.315781021956354
Process elapsed time (cuCIM): 16.732423092238605
# of processes : 12
Process elapsed time (OpenSlide): 38.80393008608371
Process elapsed time (cuCIM): 14.9841771251522