#!/usr/bin/env python # coding: utf-8 # # Axis transposition benchmarking # This notebook compares performance of different implementations of transposing axes. # # **Note:** benchmarking results vary heavily depending on image size, kernel size, used operations, parameters and used hardware. Use this notebook to adapt it to your use-case scenario and benchmark on your target hardware. If you have different scenarios or use-cases, you are very welcome to submit your notebook as pull-request! # In[1]: import pyclesperanto_prototype as cle import numpy as np import time import cupy as cp # to measure kernel execution duration properly, we need to set this flag. It will slow down exection of workflows a bit though cle.set_wait_for_kernel_finish(True) # selet a GPU with the following in the name. This will fallback to any other GPU if none with this name is found cle.select_device('RTX') # In[2]: # test data import numpy as np test_image = np.random.random([100, 512, 1024]) # ## clEsperanto # In[3]: # transpose with pyclesperanto result_image = None test_image_gpu = cle.push_zyx(test_image) for i in range(0, 10): start_time = time.time() result_image = cle.transpose_xz(test_image_gpu, result_image) print("pyclesperanto transpose duration: " + str(time.time() - start_time)) print(result_image.shape) # ## cupy # In[4]: # transpose with numpy result_image = None cu_test_image = cp.asarray(test_image) for i in range(0, 10): start_time = time.time() result_image = cp.transpose(cu_test_image, (2, 1, 0)) cp.cuda.stream.get_current_stream().synchronize() # we need to wait here to measure time properly print("cupy transpose duration: " + str(time.time() - start_time)) print(result_image.shape) # ## numpy # In[5]: # transpose with numpy result_image = None for i in range(0, 10): start_time = time.time() result_image = np.transpose(test_image, (2, 1, 0)) print("numpy transpose duration: " + str(time.time() - start_time)) print(result_image.shape) # In[ ]: