#!/usr/bin/env python # coding: utf-8 # In[1]: from lbmpy.session import * from lbmpy.fieldaccess import * from lbmpy.advanced_streaming.utility import is_inplace, Timestep from lbmpy.boundaries import NoSlip, UBB # # Demo: Streaming Patterns # # Due to the abstract field access provided by `lbmpy`, it is seamlessly possible to support several different streaming steps for the lattice Boltzmann method. All supported streaming steps can be found in lbmpy.fieldaccess. Essential streaming algorithms are the two `Two-Field-algorithms`, namely the `Pull` and the `Push` step, and four variants of `One-Field-algorithms` (sometimes also refered to as `in-place streaming schemes`). These are called the [AA-Pattern](https://doi.org/10.1109/ICPP.2009.38), the [Esoteric Twist](https://doi.org/10.3390/computation5020019), and its two variants, the [Esoteric Pull and Esoteric Push](https://doi.org/10.3390/computation10060092) scheme. # An essential property of these two categories of streaming schemes is that for `Two-Field-algorithms`, two copies of the PDF vector are needed (because the algorithm is not thread-safe), while the `One-Field-algorithms` only need one PDF vector. Thus they save about 50 % of memory. However, this comes at the cost of slightly higher complexity. The reason is that `One-Field-algorithms` are split into Odd and Even timesteps. Thus two variants of the compute kernels, boundary conditions, etc., need to be implemented. Usually, this states a more significant problem, but due to its high level of abstraction, lbmpy can generate special variants automatically and thus hide most of the complexity completely. # # # Besides the six above mentioned, more `PdfFieldAccessor` classes are available. However, these are either not combinable with large-scale MPI-distributed simulations (e.g., the `PeriodicTwoFieldsAccessor`) or fulfill only particular purposes like `CollideOnlyInplaceAccessor`. # Let us start by taking a visual look at the different variants. # ## Collide In-Place # # This special accessor essentially models the absence of a streaming step. # Using it will lead to just executing the collision step on every cell and replacing local pre- with post-collision values. # In[2]: collide_only = CollideOnlyInplaceAccessor() visualize_pdf_field_accessor(collide_only) # ## Pull # In[3]: pull_pattern = StreamPullTwoFieldsAccessor() visualize_pdf_field_accessor(pull_pattern) # ## Push # In[4]: push_pattern = StreamPushTwoFieldsAccessor() visualize_pdf_field_accessor(push_pattern) # ## Esoteric Twist # # [Esoteric Twist: An Efficient in-Place Streaming Algorithmus for the Lattice Boltzmann Method on Massively Parallel Hardware](https://doi.org/10.3390/computation5020019) # In[5]: eso_odd = EsoTwistOddTimeStepAccessor() visualize_pdf_field_accessor(eso_odd) # In[6]: eso_even = EsoTwistEvenTimeStepAccessor() visualize_pdf_field_accessor(eso_even) # ## Esoteric Pull # # [Esoteric Pull and Esoteric Push: Two Simple In-Place Streaming Schemes for the Lattice Boltzmann Method on GPUs](https://doi.org/10.3390/computation5020019) # In[7]: eso_pull_odd = EsoPullOddTimeStepAccessor() visualize_pdf_field_accessor(eso_pull_odd) # In[8]: eso_pull_even = EsoPullEvenTimeStepAccessor() visualize_pdf_field_accessor(eso_pull_even) # ## Esoteric Push # # [Esoteric Pull and Esoteric Push: Two Simple In-Place Streaming Schemes for the Lattice Boltzmann Method on GPUs](https://doi.org/10.3390/computation5020019) # In[9]: eso_push_odd = EsoPushOddTimeStepAccessor() visualize_pdf_field_accessor(eso_push_odd) # In[10]: eso_push_even = EsoPushEvenTimeStepAccessor() visualize_pdf_field_accessor(eso_push_even) # ## AA Pattern # # [Accelerating Lattice Boltzmann Fluid Flow Simulations Using Graphics Processors](https://doi.org/10.1109/ICPP.2009.38) # In[11]: aa_odd = AAOddTimeStepAccessor() visualize_pdf_field_accessor(aa_odd) # In[12]: aa_even = AAEvenTimeStepAccessor() visualize_pdf_field_accessor(aa_even) # ## Streaming Patterns in Practice # # As a next step, we will apply the different streaming patterns to a Lid-driven-cavity problem. Most of the steps are already shown in previous tutorials. Thus these should be used as a reference for setting up simulations in `lbmpy` in general. The main difference is that the `streaming_pattern` keyword will be passed to all top-level functions. # In[13]: omega = 1.9 stencil = LBStencil(Stencil.D2Q9) domain_size = (100, 100) dim = len(domain_size) streaming_pattern = 'esotwist' # pull push aa esotwist esopull esopush timesteps = get_timesteps(streaming_pattern) # [EVEN, ODD] for in-place patterns, [BOTH] for one-field patterns zeroth_timestep = timesteps[0] current_timestep = zeroth_timestep # at the beginning current timestep is zeroth timestep # In[14]: dh = ps.create_data_handling(domain_size=domain_size) pdfs = dh.add_array('pdfs', values_per_cell=stencil.Q) dh.fill('pdfs', 0.0, ghost_layers=True) if not is_inplace(streaming_pattern): pdfs_tmp = dh.add_array('pdfs_tmp', values_per_cell=stencil.Q) dh.fill('pdfs_tmp', 0.0, ghost_layers=True) velField = dh.add_array('velField', values_per_cell=stencil.D) dh.fill('velField', 0.0, ghost_layers=True) densityField = dh.add_array('densityField', values_per_cell=1) dh.fill('densityField', 1.0, ghost_layers=True) # In[15]: output = {'density': densityField, 'velocity': velField} lbm_config = LBMConfig(stencil=Stencil.D2Q9, method=Method.CUMULANT, relaxation_rate=omega, compressible=True, field_name='pdfs', output=output, streaming_pattern=streaming_pattern) method = create_lb_method(lbm_config=lbm_config) method # In[16]: init = pdf_initialization_assignments(method, densityField, velField, pdfs, streaming_pattern=streaming_pattern, previous_timestep=zeroth_timestep) ast_init = ps.create_kernel(init, target=dh.default_target) kernel_init = ast_init.compile() dh.run_kernel(kernel_init) # In[17]: if is_inplace(streaming_pattern): lbm_optimisation = LBMOptimisation(symbolic_field=pdfs) else: lbm_optimisation = LBMOptimisation(symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp) collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation) # In[18]: even_lbm_config = replace(lbm_config, timestep=Timestep.EVEN) update_even = create_lb_update_rule(lbm_config=even_lbm_config, lbm_optimisation=lbm_optimisation) ast_kernel_even = ps.create_kernel(update_even, target=dh.default_target, cpu_openmp=True) kernel_even = ast_kernel_even.compile() if is_inplace(streaming_pattern): odd_lbm_config = replace(lbm_config, timestep=Timestep.ODD) update_odd = create_lb_update_rule(lbm_config=odd_lbm_config, lbm_optimisation=lbm_optimisation) ast_kernel_odd = ps.create_kernel(update_odd, target=dh.default_target, cpu_openmp=True) kernel_odd = ast_kernel_odd.compile() else: kernel_odd = kernel_even lb_kernels = [kernel_even, kernel_odd] # In[19]: bh = LatticeBoltzmannBoundaryHandling(method, dh, 'pdfs', streaming_pattern=streaming_pattern) moving_wall = UBB((0.05, 0)) wall = NoSlip("wall") bh.set_boundary(moving_wall, slice_from_direction('N', dim)) for direction in ('E', 'S', 'W'): bh.set_boundary(wall, slice_from_direction(direction, dim)) plt.figure(dpi=200) plt.boundary_handling(bh) # In[20]: if is_inplace(streaming_pattern): def timeloop(timeSteps): global current_timestep for i in range(timeSteps): bh(prev_timestep=current_timestep) current_timestep = current_timestep.next() dh.run_kernel(lb_kernels[current_timestep.idx]) else: def timeloop(timeSteps): global current_timestep for i in range(timeSteps): bh(prev_timestep=current_timestep) current_timestep = current_timestep.next() dh.run_kernel(lb_kernels[current_timestep.idx]) dh.swap("pdfs", "pdfs_tmp") # In[21]: if 'is_test_run' not in globals(): def run(): timeloop(100) return dh.gather_array('velField') animation = plt.vector_field_magnitude_animation(run, frames=300, rescale=True) set_display_mode('video') res = display_animation(animation) else: timeloop(10) res = None res