#!/usr/bin/env python # coding: utf-8 # ![Banner](../media/banner2.png) # # --- # # Workshop 2.1: Jupyter Notebooks Advanced # # * **Contributors**: # * Ashwin Patil (@ashwinpatil) # * Luis Francisco Monge Martinez (@LuckyLuke) # * Ian Hellen (@ianhellen) #

# * **Agenda**: # * [Jupyter is not just Python](#notjustpython) # * [Jupyter Kernels & Python environments](#kernels) # * [Magics](#magics) # * [Widgets introduction](#widgets)[ # * [Jupyter Extensions](#extensions) # * [Export and create notebooks](#nbconvert) # * [Dev topics - Debugging and testing notebook code](#debugging) #

# * **Notebook**: [https://aka.ms/Jupyterthon-ws-2-1](https://aka.ms/Jupyterthon-ws-2-1) # * **License**: [Creative Commons Attribution-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-sa/4.0/) # # * **Q&A** - OTR Discord **#Jupyterthon #WORKSHOP DAY 2 - JUPYTER ADVANCED** # --- # # # Jupyter is not just Python [Ashwin] # - Powershell kernel # - R kernel # --- # # # Jupyter Kernels & Python environments # # Python environments let you create "isolated" installations with independent versions of packages. # # This is usually **A VERY GOOD IDEA**! # # Linux # # ```bash # python -m venv MyNewEnv # source ./MyNewEnv/Scripts/activate # pip install msticpy # ``` # # Windows # # ```cmd # python -m venv MyNewEnv # .\MyNewEnv\Scripts\activate # pip install msticpy # ``` # # Conda # # ```bash # conda create -n MyNewCondaEnv # conda activate MyNewCondaEnv # conda install pip # pip install msticpy # ``` # # ## Using different Python Kernels with Jupyter # # Note: VSCode seems to be able to use Python or Conda environments anyway but installing a dedicated ipykernel is needed for debugging. # # ```bash # python -m ipykernel install --user --name MyNewCondaEnv --display-name "Python3 (MyNewCondaEnv)" # ``` # ![Kernels1](../media/JLab_kernels1.png) # ![Kernels2](../media/JLab_kernels2.png) # ### To remove unwanted kernels # # ``` # jupyter kernelspec remove KERNELNAME # # ``` # # Example # # ``` # (base) e:\src\test>jupyter kernelspec list # [ListKernelSpecs] WARNING | Config option `kernel_spec_manager_class` not recognized by `ListKernelSpecs`. # Available kernels: # bhconda C:\Users\Ian\AppData\Roaming\jupyter\kernels\bhconda # bluehound C:\Users\Ian\AppData\Roaming\jupyter\kernels\bluehound # condadev C:\Users\Ian\AppData\Roaming\jupyter\kernels\condadev # mynewcondaenv C:\Users\Ian\AppData\Roaming\jupyter\kernels\mynewcondaenv # python3 C:\Users\Ian\AppData\Roaming\jupyter\kernels\python3 # xpython F:\anaconda\share\jupyter\kernels\xpython # # # (base) e:\src\test>jupyter kernelspec remove mynewcondaenv # [RemoveKernelSpec] WARNING | Config option `kernel_spec_manager_class` not recognized by `RemoveKernelSpec`. # Kernel specs to remove: # mynewcondaenv C:\Users\Ian\AppData\Roaming\jupyter\kernels\mynewcondaenv # Remove 1 kernel specs [y/N]: y # [RemoveKernelSpec] Removed C:\Users\Ian\AppData\Roaming\jupyter\kernels\mynewcondaenv # ``` # # Remove the environment if you don't need it # # Python venv - just delete the venv folder # # Conda # ``` # conda remove --all -n MyNewCondaEnv # ``` # --- # # # Magics [Ian] # # [https://ipython.readthedocs.io/en/stable/interactive/magics.html](https://ipython.readthedocs.io/en/stable/interactive/magics.html) # # ## What are they? # # Magics are a kind of macro/function that allows you to invoke functionality # of the notebook or OS independent of the kernel language. # # ### Line magics - single % # - Only operate on the arguments on the remainder of the line # - Can be mixed with other code # # ### Cell magics - double %% # - Operate on whole cell contents # - Must be in their own cell and at the start of the cell (even comments!) # # ## Popular magics - #

# %magic %env %writefile %js %hmtl %pip %logstart #

# # %magic - lists all magic functions (LONG!) # # %logstart log_file - very useful if you are prone to deleting/overwriting your code and then regret it # # %pdb, %tb and %xmode covered in later section # ### Get or set environment variables #

# %env #

# In[5]: get_ipython().run_line_magic('env', 'HOME') # In[7]: # %load ./test_mod.py import sys print(sys.version_info) print(sys.platform) # ### Run pip #

# %pip #

# # Always use this rather than !pip # In[12]: get_ipython().run_line_magic('pip', 'show pandas') # In[ ]: get_ipython().run_line_magic('pip', '') # In[8]: get_ipython().run_line_magic('run', 'test_mod.py') # In[4]: import math max((math.pow(math.pi, x) for x in range(10))) # In[5]: get_ipython().run_line_magic('timeit', 'max((math.pow(math.pi, x) for x in range(10)))') # In[14]: get_ipython().run_cell_magic('html', '', '

\nHello Jupyterthon!\n

\n') # ### Write (or append) the contents of a cell to a file # # ** Note - cell magic! ** #

# %%writefile file_name
# %%writefile -a file_name #

# # In[15]: get_ipython().run_cell_magic('writefile', '-a test_mod.py', '\nprint(sys.platform)\n') # ### Run a Python script #

# %run py_file_name #

# # In[16]: get_ipython().run_line_magic('run', 'test_mod.py') # ## Invoking shell commands # # Prefix with ! # # These are not magics - they directly invoke underlying OS commands. # # Like line magics, can use these mixed with other code # In[17]: get_ipython().system('dir') # In[18]: my_folder = get_ipython().getoutput('dir') print(f"Captured {len(my_folder)} lines:\n", my_folder) # ## Creating Magics # In[19]: from IPython.core.magic import register_line_magic ## also register_cell_magic for cell magics # register_line_cell_magic for a magic that works with both @register_line_magic def ian_is(line): "my line magic" return f"Ian is {' '.join(word.capitalize() for word in line.split())}" del ian_is # In[20]: get_ipython().run_line_magic('ian_is', 'a fan of Python') # ### Magic example # In[21]: import msticpy # In[22]: get_ipython().run_cell_magic('ioc', '', '\nTYPE\nINDICATOR\nROLE\nTITLE\nADDED\nACTIVE\nRELATED PULSES\nURL\thttp://av-quiz.tk/wp-content/k6K/\t\t\tNov 16, 2021, 11:20:26 AM\t\t2\t\nIPv4\t94.177.248.64\t\t\tNov 16, 2021, 11:20:26 AM\t\t8\t\nIPv4\t92.207.181.106\t\t\tNov 16, 2021, 11:20:26 AM\t\t2\t\nIPv4\t81.0.236.93\t\t\tNov 16, 2021, 11:20:26 AM\t\t126\t\nIPv4\t51.75.33.120\t\t\tNov 16, 2021, 11:20:26 AM\t\t265\t\nFileHash-SHA256\tf7a4da96129e9c9708a005ee28e4a46af092275af36e3afd63ff201633c70285\t\t\tNov 16, 2021, 11:20:26 AM\t\t3\t\nFileHash-SHA256\td95125b9b82df0734b6bc27c426d42dea895c642f2f6516132c80f896be6cf32\t\t\tNov 16, 2021, 11:20:26 AM\t\t3\t\nFileHash-SHA256\tbd9b8fe173935ad51f14abc16ed6a5bf6ee92ec4f45fd2ae1154dd2f727fb245\t\t\tNov 16, 2021, 11:20:26 AM\t\t3\t\nFileHash-SHA256\tb95a6218777e110578fa017ac14b33bf968ca9c57af7e99bd5843b78813f46e0\t\t\tNov 16, 2021, 11:20:26 AM\t\t2\t\nFileHash-SHA256\t9c345ee65032ec38e1a29bf6b645cde468e3ded2e87b0c9c4a93c517d465e70d\t\t\tNov 16, 2021, 11:20:26 AM\t\t2\t\n') # ---- # # # Widgets introduction [Luis] # Interactive HTML widgets for Jupyter Notebooks and IPython kernel. # Easy way to avoid input errors, types mismatch, date fortmat errors... # # In[1]: get_ipython().system('pip show ipykernel') #It's neccessary to select an ipykernel to work with ipywidgets # In[2]: import ipywidgets as widgets # ### Integer Slider # In[3]: w = widgets.IntSlider() display(w) # In[4]: w.value = 89 # ### Intenger Range Slider # In[5]: widgets.IntRangeSlider(value=[5, 7], min=0, max=10) # ### Integer Progress Bar # In[6]: p = widgets.IntProgress( value=0, min=0, max=9, description='Loading:', bar_style='', # 'success', 'info', 'warning', 'danger' or '' style={'bar_color': 'maroon'}, orientation='horizontal' ) # In[7]: import time from IPython.display import Markdown display(p) for x in range(10): p.value = x time.sleep(1) if x>4: p.style.bar_color = 'green' p.close() display(Markdown('***Finished!***')) # ### Dropdown # In[8]: widgets.Dropdown( options=[('One', 1), ('Two', 2), ('Three', 3)], value=2, description='Number:', ) # ### Multiselector # In[9]: sm = widgets.SelectMultiple( options=['Option1', 'Option2', 'Option3'], #rows=10, description='Modules', disabled=False ) display(sm) # In[13]: sm.value # ### Data Picker # In[11]: widgets.DatePicker( description='Pick a Date', disabled=False ) # ### File Uploader # In[14]: fu = widgets.FileUpload( accept='', # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf' multiple=False # True to accept multiple files upload else False ) display(fu) # ### More sophisticated file/folder chooser # [ipyfilechooser Project](https://github.com/crahan/ipyfilechooser) # In[15]: get_ipython().run_line_magic('pip', 'install ipyfilechooser') # In[16]: from ipyfilechooser import FileChooser fc = FileChooser() #fc.show_only_dirs = True fc.show_hidden = True fc.use_dir_icons = True fc.title = 'Input folder Path' display(fc) # MSTICPy also includes a number of advanced widgets. You can find out more about them in the workshop session on MSTICPy later today. # --- # # # Jupyter Extensions [Luis] # # Extension are client-specific, most only Jupyter classic. In this section we will talk about JupyterLab extensions. # Fundamentally, JupyterLab is designed as an extensible environment. JupyterLab extensions can customize or enhance any part of JupyterLab. They can provide new themes, file viewers and editors, or renderers for rich outputs in notebooks. Extensions can add items to the menu or command palette, keyboard shortcuts, or settings in the settings system. Extensions can provide an API for other extensions to use and can depend on other extensions. In fact, the whole of JupyterLab itself is simply a collection of extensions that are no more powerful or privileged than any custom extension. # ### Creating config file # This file will be used to keep extensions configurations. # File will be created in '~/.jupyter/jupyter_lab_config.py' # In[ ]: get_ipython().system('jupyter lab --generate-config') # ### JupyterLab System Monitor # JupyterLab extension to display system information (memory and cpu usage). [Project](https://github.com/jtpio/jupyterlab-system-monitor) # In[ ]: get_ipython().system('pip install jupyterlab-system-monitor') # Add this lines to config file. # ``` # # amount of memory expressed in bytes # c.ResourceUseDisplay.mem_limit = 8564768768 # c.ResourceUseDisplay.track_cpu_percent = True # c.ResourceUseDisplay.cpu_limit = 8 # ``` # ![Sysmonitor](https://github.com/jtpio/jupyterlab-system-monitor/raw/main/doc/screencast.gif) # ### Git # A JupyterLab extension for version control using Git. [Project](https://github.com/jupyterlab/jupyterlab-git) # In[ ]: get_ipython().system('pip install jupyterlab-git') # ![Git](https://raw.githubusercontent.com/jupyterlab/jupyterlab-git/master/docs/figs/preview.gif) # ### JupyterLab Templates # Support for jupyter notebook templates in jupyterlab. [Project](https://github.com/jpmorganchase/jupyterlab_templates) # In[ ]: get_ipython().system('pip install jupyterlab_templates') get_ipython().system('jupyter labextension install jupyterlab_templates') get_ipython().system('jupyter serverextension enable --py jupyterlab_templates') # Add this lines to config file. # ``` # c.JupyterLabTemplates.template_dirs = ['list', 'of', 'template', 'directories'] # c.JupyterLabTemplates.include_default = True # c.JupyterLabTemplates.include_core_paths = True # ``` # **Tip**: It's necessary to put the templates inside a folder inside indicated folder. # ![Templates](https://raw.githubusercontent.com/jpmorganchase/jupyterlab_templates/main/docs/example1.gif) # ### Code Snippets (Elyra) # The ability to reuse pieces of code allows users to avoid doing repetitive work, making the programming workflow more simple and productive. Elyra supports custom code snippets that can be added to the file editor. [Project](https://elyra.readthedocs.io/en/latest/getting_started/overview.html#reusable-code-snippets) # In[ ]: get_ipython().system('pip install elyra-code-snippet-extension') get_ipython().system('pip install -U "nbclassic>=0.2.8"') get_ipython().system('jupyter lab build') # ![Snippets example](https://elyra.readthedocs.io/en/latest/_images/code-snippet-expanded.png) # # Export and create notebooks # ## NBFormat - Create a notebook programmatically [Roberto] # # * Jupyter notebook files are simple JSON documents, containing text, source code, rich media output, and metadata. # * Each segment of the document is stored in a cell. # * We can use the [nbformat](https://nbformat.readthedocs.io/en/latest/api.html) Python APIs to create notebook markdown and code cells. # # **Create a Notebook Object** # * Import nbformat library # * Create a new notebook object # * Initialize notebook cells as an empty list # In[10]: import nbformat as nbf nb = nbf.v4.new_notebook() nb['cells'] = [] # **Create a Markdown Cell** # * Use the [nbformat.v4.new_markdown_cell API](https://nbformat.readthedocs.io/en/latest/api.html#nbformat.v4.new_markdown_cell) to create a new markdown cell # * Append the results to the notebooks cells list # In[11]: nb['cells'].append(nbf.v4.new_markdown_cell("# Remote Service Creation")) nb['cells'] # **Create a Code Cell** # * Use the [nbformat.v4.new_code_cell API](https://nbformat.readthedocs.io/en/latest/api.html#nbformat.v4.new_code_cell) to create a new code cell # * Append the results to the notebooks cells list # In[12]: nb['cells'].append(nbf.v4.new_code_cell("""from openhunt.mordorutils import * spark = get_spark()""" )) nb['cells'] # **Write Noteook File** # Use the [nbformat.write API](https://nbformat.readthedocs.io/en/latest/api.html#nbformat.write) to write the notebook object to a file. # In[ ]: nbf.write(nb, "test.ipynb") # ![](../media/day2/nbformat-write-notebook.png) # **Examples: Document research and detection logic in notebooks programmatically** # * [An interactive Book over the Threat Hunter Playbook](https://medium.com/threat-hunters-forge/writing-an-interactive-book-over-the-threat-hunter-playbook-with-the-help-of-the-jupyter-book-3ff37a3123c7) # * [Jupyter Notebooks ๐Ÿ““ from SIGMA Rules ๐Ÿ›กโš”๏ธ to Query Elasticsearch ๐Ÿน](https://medium.com/threat-hunters-forge/jupyter-notebooks-from-sigma-rules-%EF%B8%8F-to-query-elasticsearch-31a74cc59b99) # * [Seurity Datasets project: YAML -> Notebooks](https://github.com/OTRF/Security-Datasets) # # ## NBConvert - Exporting and converting to other formats [Ian] # # ### From the command line # #

# jupyter nbconvert --to FORMAT input_notebook.ipynb #

# # In[37]: get_ipython().system('jupyter nbconvert --to RST day2-1-Jupyter-advanced-topics.ipynb') # ## In code # In[23]: import nbformat # Import notebook into structured format with nbformat our_notebook = nbformat.read("day2-1-Jupyter-advanced-topics.ipynb", as_version=4) our_notebook.cells[0] # ### Convert a notebook to HTML # In[32]: # Import the exporter from nbconvert import HTMLExporter, PythonExporter # Instantiate the exporter html_exporter = HTMLExporter() html_exporter.template_name = 'classic' # Convert the notebook (body, resources) = html_exporter.from_notebook_node(our_notebook) print(body[:200]) out_file = "day2-1-Jupyter-advanced-topics.html" with open(out_file, "w", encoding="utf-8") as nb_file: nb_file.write(body) # ### Convert to Python module # In[35]: # rst_exporter = RSTExporter() py_exporter = PythonExporter() # rst_text, _ = rst_exporter.from_notebook_node(our_notebook) py_text, _ = py_exporter.from_notebook_node(our_notebook) py_out_file = "day2-1-Jupyter-advanced-topics.py" with open(py_out_file, "w", encoding="utf-8") as nb_file: nb_file.write(py_text) # --- # # Dev topics - Debugging and testing notebook code [Ian] # # ## Magics and errors โ€“ traceback, xmode, debug # # In[45]: # Bad code example def bad_func(param1, param2): """What could possibly go wrong.""" return param1 + param2 def func_in_middle(*args): """It's not my problem""" return bad_func(*args) def hapless(): """I'm just hoping for the best.""" print(func_in_middle(1, 2)) print(func_in_middle("Hello", "World")) print(func_in_middle("Hello", 1)) hapless() # ### Use %tb to review last traceback #

# %tb #

# # In[40]: get_ipython().run_line_magic('tb', '') # ### Use `%xmode` magic to include parameter values # #

# %mode { Verbose | Context | Plain | Minimal } #

# # In[41]: get_ipython().run_line_magic('xmode', 'verbose') get_ipython().run_line_magic('tb', '') # In[42]: get_ipython().run_line_magic('xmode', 'context') get_ipython().run_line_magic('tb', '') # ### Exceptions within Exceptions # In[43]: def func_in_middle2(*args): """It's not my problem but let me try to fix things""" try: return bad_func(*args) except TypeError as err: return "".join(args) except Exception as err: raise RuntimeError("Something terrible happened") from err def hapless2(): """I'm just hoping for the best.""" print(func_in_middle(1, 2)) print(func_in_middle("Hello", "World")) print(func_in_middle2("Hello", 1)) hapless2() # ## Debugging bare-handed # In[62]: get_ipython().run_cell_magic('debug', '', 'hapless()\n') # ## Debugging from a comfy chair # In[46]: hapless() # ## Running Jupyter notebooks in a unit test [Ian] # # ### Why? - Quick and dirty testing # # Caveats # - Only tests happy path # - (Obviously) only works if it's a non-interactive notebook # # Good for: # - Quick coverage - esp if you been manually testing in a notebook # - Lazy programmers # - People with lots of notebooks to test # # The code to run a notebook from code. # # In[50]: import nbformat from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError nb_path = "../data/broken_notebook.ipynb" def test_notebook(): output_path = "../data" with open(nb_path) as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=600, kernel_name="python3") try: ep.preprocess(nb, {"metadata": {"path": output_path}}) except CellExecutionError: nb_err = str(nb_path).replace(".ipynb", "-err.ipynb") msg = f"Error executing the notebook '{nb_path}'.\n" msg += f"See notebook '{nb_err}' for the traceback." print(msg) with open(nb_err, mode="w", encoding="utf-8") as f: nbformat.write(nb, f) raise test_notebook() # # ### Output when test fails # # ``` # if not cell_allows_errors: # > raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content) # E nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: # E ------------------ # E if not iplocation.settings.args.get("AuthKey") and not ips_key.value: # E raise ValueError("No Authentication key in config/environment or supplied by user.") # E if ips_key.value: # E iplocation = IPStackLookup(api_key=ips_key.value) # E loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') # E print('Raw result') # E display(loc_result) # E # E print('IP Address Entity') # E display(ip_entity[0]) # E ------------------ # E # E --------------------------------------------------------------------------- # E IndexError Traceback (most recent call last) # E in # E 8 # E 9 print('IP Address Entity') # E ---> 10 display(ip_entity[0]) # E # E IndexError: list index out of range # E IndexError: list index out of range # # /opt/hostedtoolcache/Python/3.6.15/x64/lib/python3.6/site-packages/nbclient/client.py:765: CellExecutionError # ----------------------------- Captured stdout call ----------------------------- # Error executing the notebook 'docs/notebooks/GeoIPLookups.ipynb'. # See notebook 'docs/notebooks/GeoIPLookups-err.ipynb' for the traceback. # ``` # --- # # End of Session # # Break: 5 Minutes # # ![](../media/dog-leash-break.jpg)