# standard python tools
import numpy as np
from glob import glob
import os
# make sure pyqae is available
import pyqae as pq
import pyspark
from pyqae.images import viz
# lazy image
from pyqae.images.lazy import paths_to_tiled_image, DiskMappedLazyImage, backends
print(backends)
findspark could not be initialized [<class 'pyqae.images.lazy.LazyImagePillowBackend'>]
base_path = os.path.join("..","test","resources")
little_image_list = glob(os.path.join(base_path,"multilayer_tif","*.tif"))
big_image_list = [little_image_list[-1]]
start_img = DiskMappedLazyImage(big_image_list[0], backends[0])
print(start_img.shape)
(120, 120)
def create_line_break(x_tile_size, x_dim):
return [(x, min(x+x_tile_size, x_dim)) for x in range(0, x_dim, x_tile_size)]
def create_tiles(x_tile_size, x_dim, y_tile_size, y_dim):
return [(x_tile, y_tile) for x_tile in create_line_break(x_tile_size, x_dim)
for y_tile in create_line_break(y_tile_size, y_dim)]
def tile_dict(tile_list):
return [{'x_start': int(xs), 'y_start': int(ys),
'x_end': int(xe), 'y_end': int(ye)} for ((xs, xe), (ys, ye)) in tile_list]
print(create_line_break(256, 1025))
[(0, 256), (256, 512), (512, 768), (768, 1024), (1024, 1025)]
im_size = np.random.randint(1000, 4000, size = 2)
for tile_size in np.random.randint(100, 300, size = (3,2)):
c_tiles = create_tiles(tile_size[0], im_size[0],tile_size[1], im_size[1])
tile_sum = np.sum([(xe-xs)*(ye-ys) for ((xs, xe), (ys, ye)) in c_tiles])
print(np.random.permutation(c_tiles)[0:2])
assert tile_sum == im_size[0]*im_size[1], "Total pixel count cannot change, {} != {}".format(im_size[0]*im_size[1], tile_sum)
[[[2990 3196] [1134 1296]] [[1495 1794] [1620 1782]]] [[[3131 3196] [1815 1980]] [[2626 2727] [1485 1650]]] [[[2700 2925] [1480 1665]] [[3150 3196] [1295 1480]]]
from pyspark.sql import SQLContext
from pyspark import SparkContext
sc = SparkContext()
sqlContext = SQLContext(sc)
sqlContext.createDataFrame(tile_dict(c_tiles))
/srv/conda/lib/python3.6/site-packages/pyspark/sql/session.py:340: UserWarning: inferring schema from dict is deprecated,please use pyspark.sql.Row instead warnings.warn("inferring schema from dict is deprecated,"
DataFrame[x_end: bigint, x_start: bigint, y_end: bigint, y_start: bigint]
sc.stop()