#!/usr/bin/env python # coding: utf-8 # Accera logo # # # Accera Quickstart Example # # In this example, we will: # # * Implement matrix multiplication with a ReLU activation (matmul + ReLU), commonly used in in machine learning algorithms # * Generate two implementations: a naive algorithm and one with loop transformations # * Compare the timings of both implementations # ### Setup # # First, we'll install Accera using `pip`. # # #### Optional: if running this notebook locally # # * Linux/macOS: install gcc using `apt install gcc`. # * Windows: install Microsoft Visual Studio and run `vcvars64.bat` to setup Visual Studio tools in your `PATH` before starting the Jupyter environment. # In[ ]: get_ipython().system('pip install accera') # ### Build # # Run the code below to implement `ReLU(C + A @ B)` on arrays `A`, `B`, and `C`. # # We'll build a package called `"hello_accera"` that will export both versions as C functions. # In[ ]: import accera as acc # define placeholder inputs/output A = acc.Array(role=acc.Array.Role.INPUT, shape=(512, 512)) B = acc.Array(role=acc.Array.Role.INPUT, shape=(512, 512)) C = acc.Array(role=acc.Array.Role.INPUT_OUTPUT, shape=(512, 512)) # implement the logic for matmul and relu matmul = acc.Nest(shape=(512, 512, 512)) i1, j1, k1 = matmul.get_indices() @matmul.iteration_logic def _(): C[i1, j1] += A[i1, k1] * B[k1, j1] relu = acc.Nest(shape=(512, 512)) i2, j2 = relu.get_indices() @relu.iteration_logic def _(): C[i2, j2] = acc.max(C[i2, j2], 0.0) package = acc.Package() # fuse the i and j indices of matmul and relu, add to the package schedule = acc.fuse(matmul.create_schedule(), relu.create_schedule(), partial=2) package.add(schedule, args=(A, B, C), base_name="matmul_relu_fusion_naive") # transform the schedule, add to the package f, i, j, k = schedule.get_indices() ii, jj = schedule.tile((i, j), (16, 16)) # loop tiling schedule.reorder(j, i, f, k, jj, ii) # loop reordering plan = schedule.create_plan() plan.unroll(ii) # loop unrolling package.add(plan, args=(A, B, C), base_name="matmul_relu_fusion_transformed") # build a dynamically-linked package (a .dll or .so) that exports both functions print(package.build(name="hello_accera", format=acc.Package.Format.HAT_DYNAMIC)) # ### Benchmark # # In the previous section, we built a binary (.so) and a header file (.hat). # # Next, we will load the package and compare the timings of both implementations. # In[ ]: import hatlib as hat import numpy as np # load the package hat_package = hat.load("hello_accera.hat") # call one of the functions with test inputs A_test = np.random.rand(512, 512).astype(np.float32) B_test = np.random.rand(512, 512).astype(np.float32) C_test = np.zeros((512, 512)).astype(np.float32) C_numpy = np.maximum(C_test + A_test @ B_test, 0.0) matmul_relu = hat_package["matmul_relu_fusion_transformed"] matmul_relu(A_test, B_test, C_test) # check correctness np.testing.assert_allclose(C_test, C_numpy, atol=1e-3) # benchmark all functions hat.run_benchmark("hello_accera.hat", batch_size=5, min_time_in_sec=5) # ### Next Steps # # The [Manual](https://microsoft.github.io/Accera/Manual/00%20Introduction/) is a good place to start for an introduction to the Accera Python programming model. # # In particular, the [schedule transformations](https://microsoft.github.io/Accera/Manual/03%20Schedules/#schedule-transformations) describe how you can experiment with different loop transformations with just a few lines of Python. # # Finally, the `.hat` format is just a C header file containing metadata. Learn more about the [HAT format](https://github.com/microsoft/hat) and [benchmarking](https://github.com/microsoft/hat/tree/main/tools). # # # ## How it works # # In a nutshell, Accera takes the Python code that defines the loop schedule and algorithm and converts it into [MLIR](https://mlir.llvm.org/) intermediate representation (IR). Accera's compiler then takes this IR through a series of MLIR pipelines to perform transformations. The result is a binary library with a C header file. The library implements the algorithms that are defined in Python, and is compatible with the target. # # To peek into the stages of IR transformation that Accera does, try replacing `format=acc.Package.Format.HAT_DYNAMIC` with `format=acc.Package.Format.MLIR_DYNAMIC` in `quickstart.py`, re-run the script, and search the `_tmp` subfolder for the intermediate `*.mlir` files. We plan to document these IR constructs in the future. # # # ## Documentation # Get to know Accera by reading the [Documentation](https://microsoft.github.io/Accera/). # # You can find more step-by-step examples in the [Tutorials](https://microsoft.github.io/Accera/Tutorials).