#!/usr/bin/env python # coding: utf-8 # # Illustrated Code # ## Building Software in a Literate Way # This notebook contains the code examples for Andreas Zeller's keynotes: # # * "Illustrated Code: Building Software in a Literate Way" at ASE 2021; and # * "Illustrated Code; What Software Engineering can Learn from Research Software" at SE 2022. # Go and # # * Read the [talk slides](https://www.slideshare.net/andreas.zeller/illustrated-code-ase-2021) # * [Interact](https://mybinder.org/v2/gh/uds-se/debuggingbook/HEAD?labpath=docs/notebooks/IllustratedCode.ipynb) with the notebook # ## Talk Abstract # # Notebooks – rich, interactive documents that join together code, documentation, and outputs – are all the rage with data scientists. But can they be used for actual software development? In this talk, I share experiences from authoring two interactive textbooks – fuzzingbook.org and debuggingbook.org – and show how notebooks not only serve for exploring and explaining code and data, but also how they can be used as software modules, integrating self-checking documentation, tests, and tutorials all in one place. The resulting software focuses on the essential, is well-documented, highly maintainable, easily extensible, and has a much higher shelf life than the "duct tape and wire” prototypes frequently found in research and beyond. # # Andreas Zeller is faculty at the # CISPA Helmholtz Center for Information Security and professor for Software Engineering at # Saarland University, both in Saarbrücken, Germany. # His research # on automated debugging, mining software archives, specification mining, and security testing has proven highly influential. # Zeller is an ACM Fellow and holds an ACM SIGSOFT Outstanding Research Award. # # ## Some Support Code # In[1]: import bookutils.setup # In[2]: from Tracer import Tracer # In[3]: from typing import Any, Callable from types import FrameType from inspect import signature, getmembers # ### Sequence Diagrams with Mermaid # In[4]: class SequenceDiagramTracer(Tracer): def __init__(self, client='Client', server='Server'): super().__init__() self.lines = [] self.client = client self.server = server def traceit(self, frame: FrameType, event: str, arg: Any): if event == 'call': func = frame.f_code.co_name args = frame.f_locals args_line = ", ".join(reversed([var + "=" + repr(args[var]) for var in args])) line = f'{self.client}->>+{self.server}: {func}({args_line})' self.lines.append(line) if event == 'return': line = f'{self.server}-->>-{self.client}' if arg is not None: line += f': {repr(arg)}' self.lines.append(line) def _repr_markdown_(self) -> str: return '\n'.join(['```mermaid'] + ['sequenceDiagram'] + self.lines + ['```']) # ### Class Diagrams with Mermaid # In[5]: class ClassDiagram(): def __init__(self, cls): self.cls = cls def methods_str(self): members = [(name, fun) for (name, fun) in getmembers(self.cls) if not name.startswith('_')] attributes = '\n'.join([f' -{name} = {repr(member)}' for (name, member) in members if not callable(member) ]) methods = '\n'.join([f' +{name}{str(signature(member)).replace(" -> ", " ")}' for (name, member) in members if callable(member) ]) return attributes + '\n' + methods def _repr_markdown_(self) -> str: return f""" ```mermaid classDiagram direction TD class Server {{ {self.methods_str()} }} ```""" # # Illustrated Code: Building Software in a Literate Way # ### Jupyter Demo: Factorials # # We do a bit of Jupyter demo. Double-click on a cell to edit it. Press `Shift`+`Return` to execute/render it. # `factorial(n)` computes the factorial of n, that is $n! = \prod_{i=1}^n i = 1 \times 2 \times \dots \times n$. # In[6]: def factorial(n: int) -> int: return 1 if n <= 1 else n * factorial(n - 1) # In[7]: factorial(3) # In[8]: assert factorial(3) == 6 # ## Can Programming be Liberated from the Typewriter Style? # We define a function `middle(x, y, z)` that returns the "middle" of three integers $x$, $y$, and $z$ – i.e. the one that is neither the maximum nor the minimum of the three. # # We show how to use notebooks to # # * document its interface # * provide a specification # * provide rationales and experiments # * include tests # * include architecture # ## Interface # Let us define an interface for `middle()`: # In[9]: def middle_i(x: int, y: int, z: int) -> int: """Return the middle of three numbers x, y, z""" ... # * Standard way of documenting things # * No formal spec (what is "the middle" here?); no context; no rationale # * No usage example # * No implementation (yet) # ## Specification # # Here's an (executable) specification of `middle()`: # In[10]: def middle_spec(x: int, y: int, z: int) -> int: return sorted([x, y, z])[1] # This specification is executable, so we can easily include examples: # In[11]: middle_spec(5, 3, 7) # Or just write the examples as assertions, so we can use them as tests later: # In[12]: assert middle_spec(5, 4, 7) == 5 # Of course, your specification can also include all sorts of diagrams. (Install [jupyterlab-markup](https://opensourcelibs.com/lib/jupyterlab-markup) for this.) # ```mermaid # sequenceDiagram # Client->>+Server: middle(5, 4, 7) # Server-->>-Client: 5 # ``` # For the record, this diagram is created with five lines of Markdown: # ```mermaid # sequenceDiagram # Client->>+Server: middle(5, 4, 7) # Server-->>-Client: 5 # ``` # ## Implementation # # Let us now provide an efficient implementation for `middle()`: # In[13]: def middle(x: int, y: int, z: int) -> int: """Return the middle of three numbers x, y, z""" if y < z: if x < y: return y elif x < z: return x else: if x > y: return y elif x > z: return x return z # Once written, this is executable: # In[14]: middle(5, 4, 1) # Tests and results become part of the doc! # ## Rationale # # Why do we implement `middle()` as above, rather than using the much shorter `middle_spec()` code? Because it is about _twice as fast_: # In[15]: import time import random # In[16]: def middle_benchmark(middle_fun: Callable[[int, int, int], int], n: int = 1000000) -> float: """Return elapsed time for calling `middle_fun` `n` times""" elapsed = 0.0 for i in range(n): x = random.randint(0, 1000) y = random.randint(0, 1000) z = random.randint(0, 1000) start = time.perf_counter() _ = middle_fun(x, y, z) end = time.perf_counter() elapsed += (end - start) return elapsed # In[17]: middle_elapsed = middle_benchmark(middle) middle_elapsed # In[18]: middle_spec_elapsed = middle_benchmark(middle_spec) middle_spec_elapsed # The document can include all these experiments and their results as a rationale, as above # The document can also discuss and evaluate more alternatives,  # reproducing the thoughts and experiments of the original programmer # We can have the document check automatically whether the rationale holds: # In[19]: assert middle_elapsed < middle_spec_elapsed, "Inconsistent doc" assert middle_elapsed * 1.2 < middle_spec_elapsed, "Inconsistent doc" # This ensures consistency between text and code. # ## Tests # Tests can be written as additional examples on how the code should work: # ### Regular Tests # In[20]: assert middle(1, 2, 3) == 2 assert middle(3, 1, 2) == 2 assert middle(2, 3, 1) == 2 # If a test fails, that's the same as an example failing. (And examples act as tests.) # One can analyze (and report) test performance, again in the document – for instance, measure the coverage of our code (`*` = line is covered during testing) # In[21]: from StatisticalDebugger import CoverageCollector, code_with_coverage # In[22]: with CoverageCollector() as c: assert middle(1, 2, 3) == 2 assert middle(3, 1, 2) == 2 assert middle(2, 3, 1) == 2 # In[23]: code_with_coverage(middle, c.coverage()) # It seems we need to add more tests to cover all lines: # In[24]: with c: assert middle(2, 1, 3) == 2 assert middle(3, 2, 1) == 2 # And we achieve 100% coverage: # In[25]: code_with_coverage(middle, c.coverage()) # Assumptions about coverage can be made in the document, too: # In[26]: assert len(c.coverage()) >= 11 # ### Check against Spec # One can check against the spec, again in the document. Here we compare `middle()` against `middle_spec()` with 100,000 random numbers. # In[27]: for i in range(100000): x = random.randint(0, 1000) y = random.randint(0, 1000) z = random.randint(0, 1000) assert middle(x, y, z) == middle_spec(x, y, z) # All these tests can be run (and debugged) right from the document. # ### Symbolic Verification # One can also include static checks or symbolic verification. Here, we encode the path constraints from the `middle()` code for the Z3 constraint solver: # In[28]: from z3 import * # In[29]: s = Solver() # Create a Z3 solver with four variables x, y, z = Int('x'), Int('y'), Int('z') m = Int('middle') # In[30]: s.add(Implies(And(y < z, x < y), m == y)) # Encode the middle() constraints s.add(Implies(And(y < z, x >= y), m == x)) s.add(Implies(And(y >= z, x > y), m == y)) s.add(Implies(And(y >= z, x <= y), m == x)) s.add(Implies(And(Not(x < y), Not(x < z), Not(x > y), Not(x > z)), m == z)) # We can actually prove correctness this way: Is it possible that `middle()` returns a wrong result? (no.) # In[31]: s.add(And(x < y, y < z, m != y)) # This shouldn't be possible s.check() # ## Architecture # We can extract architecture diagrams such as a class diagram from code, always kept up to date: # In[32]: class Server(): state = 42 def middle(x: int, y: int, z: int) -> int: return middle(x, y, z) def min(x: int, y: int, z: int) -> int: return min(x, y, z) def max(x: int, y: int, z: int) -> int: return max(x, y, z) # In[33]: ClassDiagram(Server) # We can extract dynamic diagrams from executions: # In[34]: with SequenceDiagramTracer() as tracer: m = middle(30, 50, 20) tracer # One may even compare this diagram with the one in the specification and flag mismatches. # ## Tutorial # The document can contain instructions on how to run things. (Of course, these would be executable too, testing the tutorial.) # To use `middle`, you need Python 3.9 or later. First install the `debuggingbook` module, available via the Python `pip` program: # In[35]: get_ipython().system('pip install --quiet debuggingbook') # Within `debuggingbook`, the `StatisticalDebugger` provides a `middle()` function, but it is buggy (as it serves to demonstrate statistical debugging). A correct version is available as `middle_fixed()`, which you can import as `middle()` as follows: # In[36]: from debuggingbook.StatisticalDebugger import middle_fixed as middle # In[37]: middle(5, 4, 1) # In[38]: from z3 import * # In[39]: s = Solver() # Create a Z3 solver with four variables x, y, z = Int('x'), Int('y'), Int('z') m = Int('middle') # In[40]: s.add(Implies(And(y < z, x < y), m == y)) # Encode the middle() constraints s.add(Implies(And(y < z, x >= y), m == x)) s.add(Implies(And(y >= z, x > y), m == y)) s.add(Implies(And(y >= z, x <= y), m == x)) s.add(Implies(And(Not(x < y), Not(x < z), Not(x > y), Not(x > z)), m == z)) # We can actually prove correctness this way: Is it possible that `middle()` returns a wrong result? (no.) # In[41]: s.add(And(x < y, y < z, m != y)) # This shouldn't be possible s.check() # ## Q&A # # The document can contain sections with questions and answers. These would be managed by the public, and continuously ensure consistency. # # # * [What's wrong with middle()?](#What_s_wrong_with_middle()?) # * [I get a syntax error](#I_get_a_syntax_error) # * [Can we have a `largest()` and `smallest()` function too?](#Can_we_have_a_largest()_and_smallest()_function_too?) # ### What's wrong with `middle()`? # # **Question.** I use `middle` from the `StatisticalDebugger` module. However, it doesn't seem to return the correct value. What am I doing wrong? -- novice@python.net # In[42]: from StatisticalDebugger import middle middle(2, 1, 3) # should be 2 # **Best Answer (+10).** You need to import `middle_fixed` instead. -- expert@debugging.com # In[43]: from StatisticalDebugger import middle_fixed as middle middle(2, 1, 3) # should be 2 # **Answer (+5)** Don't use `middle(x, y, z)` -- just write `sorted([x, y, z])[1]` -- say.no.to@libraries.com # In[44]: sorted([2, 1, 3])[1] # **Comment (-2)** Actually, `sort()` is more efficient. -- cpluspluslover@programming.com # In[45]: xyz = list([2, 1, 3]); xyz.sort(); xyz[1] # **Comment (+2)** `sort()` takes three lines of code, whereas `sorted()` takes one. Also, avoid multiple statements on a line -- haskellfan@beautifulcode.org # ### I get a syntax error # # **Question.** When I run the above `middle()` code, I get a message # `SyntaxError: invalid syntax`. What am I doing wrong? -- appleuser@mac.com # **Best Answer (+10).** Are you using Python 2.x? Type annotations work with Python 3.6 and later -- updates@python.org # ### Can we have a `largest()` and `smallest()` function too? # # **Question.** How do I get the greatest (or smallest) of $x$, $y$, and $z$? -- novice@python.net # **Best Answer (+10).** Try Python `max()` and `min()` -- guido@python.org # In[46]: max(2, 1, 3) # should be 3 # In[47]: min(2, 1, 3) # should be 1 # ## More Drawing UML # Install [jupyterlab-markup](https://opensourcelibs.com/lib/jupyterlab-markup) # ```mermaid # graph TD; # A-->B; # A-->C; # B-->D; # C-->D; # ``` # In[48]: from IPython.display import display, Markdown # In[49]: Markdown(""" ```mermaid graph TD; A-->B; B-->A; A-->C; B-->D; C-->D; ```""") # ## More Resources # # * [I don't like notebooks](https://www.youtube.com/watch?v=7jiPeIFXb6U) # * [JupyterBook](https://blog.jupyter.org/announcing-the-new-jupyter-book-cbf7aa8bc72e) # * The [Grand Unified Theory of Documentation](https://documentation.divio.com): # * Tutorials # * How-To Guides # * Explanation # * Reference # * [Eve: Programming for Humans](http://witheve.com) # * [nbdev: Create Python Projects with Jupyter Notebooks](https://nbdev.fast.ai) # Influences: # # * \TeX: The Program by Donald J. Knuth # * Operating Systems: Design and Implementation by Andrew Tanenbaum # Our goal: Have a _single document_ that encompasses # # * Code # * Tests # * Tutorial # * How-To-Guides # * Explanations # * Reference # # in an _executable_ and _self-updating_ way. # ## What "Illustrate" stands for # ### `I` is for `I`llustrate # # Every piece of code should come with an _example_ that _illustrates_ is usage. # ### `L` is for `L`avish # # Go beyond typewriter text. Make use of all media modern tools have to offer – diagrams! charts! videos! Tie these to your code and examples, such that they stay in sync. # ### `L` is for `L`og # # Do not just describe the final version. Discuss alternatives you have tried (and revoked, and why). This will be helpful for understanding later. # # Note down your ideas and plans during development + testing # # Also log during debugging. Keep track of your experiments and their results. Turn these into test cases later. # ### `U` is for `U`pdate # # Make your document the single place to be updated. # ### `S` is for `S`piralize # # Focus on the essentials first, and add details later. Give the reader moments where they can stay and recapitulate what they learned. Make these abstraction layers in your code, such that readers can choose what to use (and what to read, too!) Proceed step by step, illustrating one piece at a time. # ### `T` is for `T`utorial # # Allow your readers to learn what your code is about – from a usage perspective, but also from an implementation perspective. Use quizzes, tests, exercises. # ### `R` is for `R`eference # # Allow your readers to extract and study those parts they need – interfaces, specs. And, of course, allow them to _use_ your code. # ### `A` is for `A`ssist # # Have others contribute to your tutorial – e.g. by providing recipes for specific how-to questions # Think of StackOverflow, but constantly tested and updated # # StackOverflow is filled with how-to questions. Yet, many of these are outdated over time. Allow for your readers to ask questions (with code that will be executed and tested as part of your work). Make your work a community effort! # ### `T` is for `T`est # # Having detailed examples should automatically lead to full coverage of your code. Have assertions on top – to explain what is going on, but also to ensure consistency between code and text. # # Save previous outputs of your examples; get notified when things change. Great way to do regression testing! # ### `E` is for `E`ase # # Your job is coding, teaching, and testing – all in one place # # Ease the life of future readers (which may include you) # # Ease the life of people who want to use your code