This notebook is used to generate the preview images of each tomographic scan. It is written in such a way that it can be run at essentially any stage of recording the tomographic data and helps to find operator errors with either scanning or reconstructing scans. Newly added data (or reconstructions that were deleted) compared to older runs of the notebooks are usually simply regenerated.
The cells below are used to set up the whole notebook. They load needed libraries and set some default values.
# Load the modules we need
import platform
import os
import glob
import pandas
import imageio
import numpy
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn
import dask
import dask_image.imread
from dask.distributed import Client, LocalCluster
import skimage
from tqdm.auto import tqdm, trange
# Load our own log file parsing code
# This is loaded as a submodule to alleviate excessive copy-pasting between *all* projects we do
# See https://github.com/habi/BrukerSkyScanLogfileRuminator for details on its inner workings
from BrukerSkyScanLogfileRuminator.parsing_functions import *
# Set dask temporary folder
# Do this before creating a client: https://stackoverflow.com/a/62804525/323100
# We use the fast internal SSD for speed reasons
import tempfile
if 'Linux' in platform.system():
# Check if me mounted the FastSSD, otherwise go to standard tmp file
if os.path.exists(os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')):
tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD', 'tmp')
else:
tmp = tempfile.gettempdir()
elif 'Darwin' in platform.system():
tmp = tempfile.gettempdir()
else:
if 'anaklin' in platform.node():
tmp = os.path.join('F:\\tmp')
else:
tmp = os.path.join('D:\\tmp')
dask.config.set({'temporary_directory': tmp})
print('Dask temporary files go to %s' % dask.config.get('temporary_directory'))
from dask.distributed import Client
client = Client()
client
print('You can seee what DASK is doing at "http://localhost:%s/status"' % client.scheduler_info()['services']['dashboard'])
# Set up figure defaults
plt.rc('image', cmap='gray', interpolation='nearest') # Display all images in b&w and with 'nearest' interpolation
plt.rcParams['figure.figsize'] = (16, 9) # Size up figures a bit
plt.rcParams['figure.dpi'] = 200
# Setup scale bar defaults
plt.rcParams['scalebar.location'] = 'lower right'
plt.rcParams['scalebar.frameon'] = False
plt.rcParams['scalebar.color'] = 'white'
# Display all plots identically
lines = 3
# And then do something like
# plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)
Since the (tomographic) data can reside on different drives we set a folder to use below
# Different locations if running either on Linux or Windows
FastSSD = False
nanoct = False # Load the data directly from the 2214
overthere = True # Load the data directly from the iee-research_storage drive
# to speed things up significantly
if 'Linux' in platform.system():
if FastSSD:
BasePath = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
elif overthere:
BasePath = os.path.join(os.sep, 'home', 'habi', 'research-storage-iee', 'microCT')
elif nanoct:
BasePath = os.path.join(os.path.sep, 'home', 'habi', '2214')
else:
BasePath = os.path.join(os.sep, 'home', 'habi', '1272')
elif 'Darwin' in platform.system():
FastSSD = False
BasePath = os.path.join('/Users/habi/Dev/EAWAG/Data')
elif 'Windows' in platform.system():
if FastSSD:
BasePath = os.path.join('F:\\')
else:
if overthere:
BasePath = os.path.join('\\\\resstore.unibe.ch', 'iee_aqua', 'microCTupload')
elif nanoct:
BasePath = os.path.join('N:\\')
else:
BasePath = os.path.join('D:\\Results')
if overthere:
Root = BasePath
else:
Root = os.path.join(BasePath, 'EAWAG')
print('We are loading all the data from %s' % Root)
Now that we are set up, actually start to load/ingest the data.
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()
# Get *all* log files
# Using os.walk is way faster than using recursive glob.glob, see DataWrangling.ipynb for details
# Not sorting the found logfiles is also making it quicker
Data['LogFile'] = [os.path.join(root, name)
for root, dirs, files in os.walk(Root)
for name in files
if name.endswith((".log"))]
# Get all folders
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]
# Show a (small) sample of the loaded data as a first check
Data.sample(n=3)
# Check for samples which are not yet reconstructed
for c, row in Data.iterrows():
# Iterate over every 'proj' folder
if 'proj' in row.Folder:
if 'TScopy' not in row.Folder and 'PR' not in row.Folder:
# If there's nothing with 'rec*' on the same level, then tell us
if not glob.glob(row.Folder.replace('proj', '*rec*')):
print('- %s is missing matching reconstructions' % row.LogFile[len(Root) + 1:])
# 103375/proj_stuck/*.log is a scan where we lost air pressure in the building and the rotation stage got stuck. We renamed the folder afterwards.
# 103761/proj_oj/103761.log is a scan where the sample holder touched the source
# Search for any .csv files in each folder.
# These are only generated when the "X/Y Alignment With a Reference Scan" was performed in NRecon.
# If those files do *not* exist we have missed to do it and should correct for this.
Data['XYAlignment'] = [glob.glob(os.path.join(f, '*T*.csv')) for f in Data['Folder']]
# Display samples which are missing the .csv-files for the XY-alignment
for c, row in Data.iterrows():
# Iterate over every 'proj' folder
if 'proj' in row['Folder']:
if not row['XYAlignment']:
if not any(x in row.LogFile for x in ['rectmp.log', # because we only exclude temporary logfiles in a later step
'proj_nofilter', # since these two scans of single teeth don't contain a reference scan
'TScopy', # discard *t*hermal *s*hift data
os.path.join('TJ3', 'jaw_v1'), # no reference scan
os.path.join('28', 'full_188um'), # no reference scan
os.path.join('75', 'proj_stuck'), # 103375\proj_stuck which got stuck
os.path.join('106985', 'proj'), # 106985/proj is a b0rked scan where something went wrong with the exposure time
os.path.join('161543', 'head_30'), # 161543\head_30um has no reference scan
os.path.join('21322', 'jaw'), # two scans of 21322 which have no reference scan
os.path.join('21322', 'whole'), # two scans of 21322 which have no reference scan
os.path.join('31', 'moved_proj'), # MA31\moved_proj which moved during acquisition
os.path.join('95', 'proj', '14295'), # no reference scan
os.path.join('104061', 'head', 'proj_pressure')]): # We lost air pressure (in the building) during this scan
print('- %s has *not* been X/Y aligned' % row.LogFile[len(Root) + 1:])
# Get rid of all the logfiles from all the folders that might be on disk but that we don't want to load the data from
for c, row in Data.iterrows():
if 'rec' not in row.Folder:
Data.drop([c], inplace=True)
elif 'SubScan' in row.Folder:
Data.drop([c], inplace=True)
elif 'rectmp.log' in row.LogFile:
Data.drop([c], inplace=True)
# Reset dataframe to something that we would get if we only would have loaded the 'rec' files
Data = Data.reset_index(drop=True)
# Generate us some meaningful colums in the dataframe
Data['Fish'] = [l[len(Root) + 1:].split(os.sep)[0] for l in Data['LogFile']]
Data['Scan'] = ['.'.join(l[len(Root) + 1:].split(os.sep)[1:-1]) for l in Data['LogFile']]
# Quickly show the data from the last loaded scans
Data.tail()
# Load the file names of all the reconstructions of all the scans
Data['Filenames Reconstructions'] = [glob.glob(os.path.join(f, '*rec0*.png')) for f in Data['Folder']]
# How many reconstructions do we have?
Data['Number of reconstructions'] = [len(r) for r in Data['Filenames Reconstructions']]
# Drop samples which have either not been reconstructed yet or of which we deleted the reconstructions with
# `find . -name "*rec*.png" -type f -mtime +333 -delete`
# Based on https://stackoverflow.com/a/13851602
# for c,row in Data.iterrows():
# if not row['Number of reconstructions']:
# print('%s contains no PNG files, we might be currently reconstructing it' % row.Folder)
Data = Data[Data['Number of reconstructions'] > 0]
# Reset the dataframe count/index for easier indexing afterwards
Data.reset_index(drop=True, inplace=True)
print('We have %s folders with reconstructions' % (len(Data)))
# Get parameters to doublecheck from logfiles
Data['Voxelsize'] = [pixelsize(log) for log in Data['LogFile']]
Data['Filter'] = [whichfilter(log) for log in Data['LogFile']]
Data['Exposuretime'] = [exposuretime(log) for log in Data['LogFile']]
Data['Scanner'] = [scanner(log) for log in Data['LogFile']]
Data['Averaging'] = [averaging(log) for log in Data['LogFile']]
Data['ProjectionSize'] = [projection_size(log) for log in Data['LogFile']]
Data['RotationStep'] = [rotationstep(log) for log in Data['LogFile']]
Data['Grayvalue'] = [reconstruction_grayvalue(log) for log in Data['LogFile']]
Data['RingartefactCorrection'] = [ringremoval(log) for log in Data['LogFile']]
Data['BeamHardeningCorrection'] = [beamhardening(log) for log in Data['LogFile']]
Data['DefectPixelMasking'] = [defectpixelmasking(log) for log in Data['LogFile']]
Data['Scan date'] = [scandate(log) for log in Data['LogFile']]
# The iee research storage folder contains some folders with scans done by Kassandra on a SkyScan1273.
# Exclude those, since they are not part of this study, we just looked at them to help her.
for c, row in Data.iterrows():
if '1273' in row.Scanner:
print('Dropping %s from our dataframe' % row.LogFile[len(Root)+1:])
Data.drop([c], inplace=True)
# Reset dataframe index
Data = Data.reset_index(drop=True)
Display the parameters we extracted from the log files (with our log file parser) to check for consistency.
# Check ring removal parameters
for machine in Data['Scanner'].unique():
print('For the %s we have '
'ringartefact-correction values of %s' % (machine,
Data[Data.Scanner == machine]['RingartefactCorrection'].unique()))
# Display ring removal parameter
for rac in sorted(Data['RingartefactCorrection'].unique()):
print('Ringartefact-correction %02s is found in %03s scans' % (rac,
Data[Data.RingartefactCorrection == rac]['RingartefactCorrection'].count()))
# Display ring removal parameter for non-zero values
for scanner in Data.Scanner.unique():
print('----', scanner, '----')
for c, row in Data[Data.Scanner == scanner].iterrows():
if row.RingartefactCorrection != 0:
print('Fish %s scan %s was reconstructed with RAC of %s' % (row.Fish,
row.Scan,
row.RingartefactCorrection))
# Check beamhardening parameters
for scanner in Data.Scanner.unique():
print('For the %s we have '
'beamhardening correction values of %s' % (scanner,
Data[Data.Scanner == scanner]['BeamHardeningCorrection'].unique()))
# Display beamhardening parameters
for scanner in Data.Scanner.unique():
print('----', scanner, '----')
for c, row in Data[Data.Scanner == scanner].iterrows():
if row.BeamHardeningCorrection != 0:
print('Scan %s of fish %s was reconstructed with beam hardening correction of %s' % (row.Scan,
row.Fish,
row.BeamHardeningCorrection))
# Check defect pixel masking parameters
for scanner in Data.Scanner.unique():
print('For the %s we have '
'defect pixel masking values of %s' % (scanner,
Data[Data.Scanner == scanner]['DefectPixelMasking'].unique()))
# Display defect pixel masking parameters
for dpm in sorted(Data['DefectPixelMasking'].unique()):
print('A defect pixel masking of %02s is found in %03s scans' % (dpm,
Data[Data.DefectPixelMasking == dpm]['DefectPixelMasking'].count()))
# seaborn.scatterplot(data=Data, x='Fish', y='DefectPixelMasking', hue='Scanner')
# plt.title('Defect pixel masking')
# plt.show()
# Display defect pixel masking parameters
for scanner in Data.Scanner.unique():
print('----', scanner, '----')
for c, row in Data[Data.Scanner == scanner].iterrows():
if row.Scanner == 'SkyScan1272' and row.DefectPixelMasking != 50:
print('Fish %s scan %s was reconstructed with DPM of %s' % (row.Fish,
row.Scan,
row.DefectPixelMasking))
if row.Scanner == 'SkyScan2214' and row.DefectPixelMasking != 0:
print('Fish %s scan %s was reconstructed with DPM of %s' % (row.Fish,
row.Scan,
row.DefectPixelMasking))
Now that we've double-checked some of the parameters (and corrected any issues that might have shown up) we start to load the preview images. If the three cells below are uncommented, the machine-generated previews are shown, otherwise we just continue.
# Data['Filename PreviewImage'] = [sorted(glob.glob(os.path.join(f, '*_spr.bmp')))[0] for f in Data['Folder']]
# Data['PreviewImage'] = [dask_image.imread.imread(pip).squeeze()
# if pip
# else numpy.random.random((100, 100)) for pip in Data['Filename PreviewImage']]
# Make an approximately square overview image
# lines = 10
# for c, row in Data.iterrows():
# plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)
# plt.imshow(row.PreviewImage.squeeze())
# plt.title(os.path.join(row['Fish'], row['Scan']))
# plt.gca().add_artist(ScaleBar(row['Voxelsize'],
# 'um',
# color='black',
# frameon=True))
# plt.axis('off')
# plt.tight_layout()
# plt.savefig(os.path.join(Root, 'ScanOverviews.png'),
# bbox_inches='tight')
# plt.show()
Now we 'load' all reconstructions from disks into stacks. They are not really loaded into memory all at once, but accessed 'on demant' from the hard disks with dask image. Otherwise we could not deal with totally 1.4 TB of reconstructions that we've generated for this project.
Then we proceed to directly calculate the central views in each of the anatomical directions as well as the maximum intensity projections in each anatomical direction.
# # Load all reconstructions DASK arrays
# Reconstructions = [dask_image.imread.imread(os.path.join(folder,'*rec*.png')) for folder in Data['Folder']]
# Load all reconstructions into ephemereal DASK arrays, with a nice progress bar...
Reconstructions = [None] * len(Data)
for c, row in tqdm(Data.iterrows(),
desc='Loading reconstructions',
total=len(Data)):
Reconstructions[c] = dask_image.imread.imread(os.path.join(row['Folder'], '*rec*.png'))
# What do we have on disk?
print('We have %s reconstructions on %s' % (Data['Number of reconstructions'].sum(), Root))
print('This is about %s reconstructions per scan (%s scans in %s folders)' % (round(Data['Number of reconstructions'].sum() / len(Data)),
len(Data),
len(Data.Fish.unique())))
# How big are the datasets?
Data['Size'] = [rec.shape for rec in Reconstructions]
# The three cardinal directions
# Names adapted to fishes: https://en.wikipedia.org/wiki/Fish_anatomy#Body
directions = ['Anteroposterior',
'Lateral',
'Dorsoventral']
# # # Get rid of certain samples
# # # This is nice to have if we're running into troubles with certain samples/scans and have to re-reconstruct them
# # # In this case we don't have to re-read in *all* the log files and reconstructions...
# for c, row in Data.iterrows():
# if '103761' in row.Fish:
# Data.drop([c], inplace=True)
# # Reset dataframe to something that we would get if we only would have loaded the 'rec' files
# Data = Data.reset_index(drop=True)
# Read or calculate the middle slices, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
Data['Mid_' + direction] = ''
for c, row in tqdm(Data.iterrows(), desc='Working on central images', total=len(Data)):
for d, direction in tqdm(enumerate(directions),
desc='%s/%s' % (row['Fish'], row['Scan']),
leave=False,
total=len(directions)):
outfilepath = os.path.join(os.path.dirname(row['Folder']),
'%s.%s.Center.%s.png' % (row['Fish'], row['Scan'], direction))
if os.path.exists(outfilepath):
try:
Data.at[c, 'Mid_' + direction] = dask_image.imread.imread(outfilepath)
except:
print('Something went wrong with the %s image of %s/%s' % (direction, row['Fish'], row['Scan']))
print('Try to delete "%s" and restart the notebook' % outfilepath)
else:
# Generate requested axial view
if 'Anteroposterior' in direction:
Data.at[c, 'Mid_' + direction] = Reconstructions[c][row.Size[0] // 2].compute()
if 'Lateral' in direction:
Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, row.Size[1] // 2, :].compute()
if 'Dorsoventral' in direction:
Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, :, row.Size[2] // 2].compute()
# Save the calculated 'direction' view to disk
imageio.imwrite(outfilepath, (Data.at[c, 'Mid_' + direction]))
# Show/save middle slices
for c, row in tqdm(Data.iterrows(),
desc='Saving overview of central images',
total=len(Data)):
outfilepath = os.path.join(os.path.dirname(row['Folder']),
'%s.%s.CentralSlices.png' % (row['Fish'], row['Scan']))
if not os.path.exists(outfilepath):
for d, direction in tqdm(enumerate(directions),
desc='%s/%s' % (row['Fish'], row['Scan']),
leave=False,
total=len(directions)):
plt.subplot(1, 3, d + 1)
plt.imshow(row['Mid_' + direction].squeeze())
if d == 0:
plt.axhline(row.Size[1] // 2, c=seaborn.color_palette()[0])
plt.axvline(row.Size[2] // 2, c=seaborn.color_palette()[1])
plt.gca().add_artist(ScaleBar(row['Voxelsize'],
'um',
color=seaborn.color_palette()[2]))
elif d == 1:
plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])
plt.axvline(row.Size[2] // 2, c=seaborn.color_palette()[1])
plt.gca().add_artist(ScaleBar(row['Voxelsize'],
'um',
color=seaborn.color_palette()[0]))
else:
plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])
plt.axvline(row.Size[1] // 2, c=seaborn.color_palette()[0])
plt.gca().add_artist(ScaleBar(row['Voxelsize'],
'um',
color=seaborn.color_palette()[1]))
plt.title('%s\nCentral %s slice' % (os.path.join(row['Fish'], row['Scan']), direction.lower()))
plt.axis('off')
plt.savefig(outfilepath,
transparent=True,
bbox_inches='tight')
plt.show()
# Read or calculate the directional MIPs, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
Data['MIP_' + direction] = ''
for c, row in tqdm(Data.iterrows(), desc='Working on MIPs', total=len(Data)):
for d, direction in tqdm(enumerate(directions),
desc='%s/%s' % (row['Fish'], row['Scan']),
leave=False,
total=len(directions)):
outfilepath = os.path.join(os.path.dirname(row['Folder']),
'%s.%s.MIP.%s.png' % (row['Fish'], row['Scan'], direction))
if os.path.exists(outfilepath):
try:
Data.at[c, 'MIP_' + direction] = dask_image.imread.imread(outfilepath).squeeze()
except:
print('Something went wrong with the %s image of %s/%s %s' % (direction, row['Fish'], row['Scan']))
print('Try to delete the %s and restart the notebook' % outfilepath)
else:
# Generate MIP
Data.at[c, 'MIP_' + direction] = Reconstructions[c].max(axis=d).compute().squeeze()
# Save it out
imageio.imwrite(outfilepath, Data.at[c, 'MIP_' + direction].astype('uint8'))
# Show/save MIP slices
for c, row in tqdm(Data.iterrows(),
desc='Saving overview of MIP images',
total=len(Data)):
outfilepath = os.path.join(os.path.dirname(row['Folder']),
'%s.%s.MIPs.png' % (row['Fish'], row['Scan']))
if not os.path.exists(outfilepath):
for d, direction in tqdm(enumerate(directions),
desc='%s/%s' % (row['Fish'], row['Scan']),
leave=False,
total=len(directions)):
plt.subplot(1, 3, d + 1)
plt.imshow(row['MIP_' + direction])
plt.gca().add_artist(ScaleBar(row['Voxelsize'],
'um'))
plt.title('%s MIP' % direction)
plt.axis('off')
plt.title('%s\n%s MIP' % (os.path.join(row['Fish'], row['Scan']), direction))
plt.savefig(outfilepath,
transparent=True,
bbox_inches='tight')
plt.show()
For further checking the data, we look at the gray value histogram of the reconstructions. This helps us to find scans that have not been reconstructed well and might either need to be repeated or simply re-reconstructed.
# Calculate the histograms of one of the MIPs
# Caveat: dask.da.histogram returns histogram AND bins, making each histogram a 'nested' list of [h, b]
Data['Histogram'] = [dask.array.histogram(dask.array.array(mip),
bins=2**8,
range=[0, 2**8]) for mip in Data['MIP_Lateral']]
# Actually compute the data and put only h into the dataframe, so we can use it below.
# Discard the bins
Data['Histogram'] = [h for h, b in Data['Histogram']]
def overeexposecheck(item, threshold=250, howmanypercent=0.025, whichone='Lateral', verbose=False):
'''Function to check if a certain amount of voxels are brighter than a certain value'''
if (Data['MIP_%s' % whichone][item] > threshold).sum() > (Data['MIP_%s' % whichone][item].size * howmanypercent / 100):
if verbose:
plt.imshow(Data['MIP_%s' % whichone][item])
plt.imshow(numpy.ma.masked_equal(Data['MIP_%s' % whichone][item] > threshold, False),
cmap='viridis_r',
alpha=.618)
plt.title('%s/%s\n%s of %s Mpixels (>%s%%) are brighter '
'than %s' % (Data['Fish'][item],
Data['Scan'][item],
(Data['MIP_%s' % whichone][item] > threshold).sum().compute(),
round(1e-6 * Data['MIP_%s' % whichone][item].size, 2),
howmanypercent,
threshold))
plt.axis('off')
plt.gca().add_artist(ScaleBar(Data['Voxelsize'][item],
'um'))
plt.show()
return(True)
else:
return(False)
# Check if 'too much' of the MIP is overexposed
Data['OverExposed'] = [overeexposecheck(c,
whichone='Lateral',
verbose=True) for c, row in Data.iterrows()]
The cell below can be used to show the gray value histograms calculated above. This can be used to drill down on the problems of over- or under-exposure of the scans or operator error with the reconstructions.
If you want to show the histograms simply uncomment the cell and run it.
# # Show *all* the gray value histograms
# for c, row in sorted(Data.iterrows()):
# plt.semilogy(row.Histogram,
# label='%s' % os.path.join(row.Fish, row.Scan))
# plt.xlim([0, 255])
# plt.legend()
# plt.show()
print('At the moment, we have previewed %s scans of %s fish in %s' % (len(Data),
len(Data.Fish.unique()),
Root))