#|default_exp processors
Some processors for NBProcessor
#|export
import ast
from nbdev.read import *
from nbdev.imports import *
from nbdev.process import *
from nbdev.showdoc import *
from nbdev.doclinks import *
from execnb.nbio import *
from execnb.shell import *
from fastcore.imports import *
from fastcore.xtras import *
import sys
#|hide
from fastcore.test import *
#|hide
_test_file = '../tests/docs_test.ipynb'
On this page we'll be using this private helper to process a notebook and return the results, to simplify testing:
def _run_procs(procs=None, preprocs=None, postprocs=None, return_nb=False, path=_test_file):
nbp = NBProcessor(path, procs, preprocs=preprocs, postprocs=postprocs)
nbp.process()
if return_nb: return nbp.nb
return '\n'.join([str(cell) for cell in nbp.nb.cells])
#|export
def nbflags_(nbp, cell, *args):
"Hide cell from output"
nbp.nb._nbflags = args
nbp = NBProcessor('../tests/01_everything.ipynb', nbflags_)
nbp.process()
test_eq(nbp.nb._nbflags, ('skip_showdoc', 'foobar'))
#|export
def cell_lang(cell): return nested_attr(cell, 'metadata.language', 'python')
def add_links(cell):
"Add links to markdown cells"
nl = NbdevLookup()
if cell.cell_type == 'markdown': cell.source = nl.linkify(cell.source)
for o in cell.get('outputs', []):
if hasattr(o, 'data') and hasattr(o['data'], 'text/markdown'):
o.data['text/markdown'] = [nl.link_line(s) for s in o.data['text/markdown']]
res = _run_procs(add_links)
assert "[numpy.array](https://numpy.org/doc/stable/reference/generated/numpy.array.html#numpy.array)" in res
assert "[ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker) but not a link to `foobar`." in res
assert "A link in a docstring: [ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker)" in res
assert "And not a link to <code>dict2nb</code>." in res
Gets rid of colors that are streamed from standard out, which can interfere with static site generators:
#|export
_re_ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
def strip_ansi(cell):
"Strip Ansi Characters."
for outp in cell.get('outputs', []):
if outp.get('name')=='stdout': outp['text'] = [_re_ansi_escape.sub('', o) for o in outp.text]
res = _run_procs(strip_ansi)
assert not _re_ansi_escape.findall(res)
#|export
def strip_hidden_metadata(cell):
'''Strips "hidden" metadata property from code cells so it doesn't interfere with docs rendering'''
if cell.cell_type == 'code' and 'metadata' in cell: cell.metadata.pop('hidden',None)
#|export
def hide_(nbp, cell):
"Hide cell from output"
del(cell['source'])
res = _run_procs(hide_)
assert 'you will not be able to see this cell at all either' not in res
#|export
def _re_hideline(lang=None): return re.compile(fr'{langs[lang]}\|\s*hide_line\s*$', re.MULTILINE)
def hide_line(cell):
"Hide lines of code in code cells with the directive `hide_line` at the end of a line of code"
lang = cell_lang(cell)
if cell.cell_type == 'code' and _re_hideline(lang).search(cell.source):
cell.source = '\n'.join([c for c in cell.source.splitlines() if not _re_hideline(lang).search(c)])
res = _run_procs(hide_line)
assert r"def show():\n a = 2\n b = 3" not in res
assert r"def show():\n a = 2" in res
#|export
def filter_stream_(nbp, cell, *words):
"Remove output lines containing any of `words` in `cell` stream output"
if not words: return
for outp in cell.get('outputs', []):
if outp.output_type == 'stream':
outp['text'] = [l for l in outp.text if not re.search('|'.join(words), l)]
res = _run_procs(filter_stream_)
exp=r"'A line\n', 'Another line.\n'"
assert exp in res
#|export
_magics_pattern = re.compile(r'^\s*(%%|%).*', re.MULTILINE)
def clean_magics(cell):
"A preprocessor to remove cell magic commands"
if cell.cell_type == 'code': cell.source = _magics_pattern.sub('', cell.source).strip()
res = _run_procs(clean_magics)
assert "%%" not in res
#|export
_langs = 'bash|html|javascript|js|latex|markdown|perl|ruby|sh|svg'
_lang_pattern = re.compile(rf'^\s*%%\s*({_langs})\s*$', flags=re.MULTILINE)
def lang_identify(cell):
"A preprocessor to identify bash/js/etc cells and mark them appropriately"
if cell.cell_type == 'code':
lang = _lang_pattern.findall(cell.source)
if lang: cell.metadata.language = lang[0]
When we issue a shell command in a notebook with !
, we need to change the code-fence from python
to bash
and remove the !
:
res = _run_procs(lang_identify)
assert "'language': 'bash'" in res
#|export
_re_hdr_dash = re.compile(r'^#+\s+.*\s+-\s*$', re.MULTILINE)
def rm_header_dash(cell):
"Remove headings that end with a dash -"
if cell.source:
src = cell.source.strip()
if cell.cell_type == 'markdown' and src.startswith('#') and src.endswith(' -'): del(cell['source'])
res = _run_procs(rm_header_dash)
assert 'some words' in res
assert 'A heading to Hide' not in res
assert 'Yet another heading to hide' not in res
#|export
_hide_dirs = {'export','exporti', 'hide','default_exp'}
def rm_export(cell):
"Remove cells that are exported or hidden"
if cell.directives_:
if cell.directives_.keys() & _hide_dirs: del(cell['source'])
res = _run_procs(rm_export)
assert 'dontshow' not in res
#|export
_re_showdoc = re.compile(r'^show_doc', re.MULTILINE)
def _is_showdoc(cell): return cell['cell_type'] == 'code' and _re_showdoc.search(cell.source)
def clean_show_doc(cell):
"Remove ShowDoc input cells"
if not _is_showdoc(cell): return
cell.source = '#| echo: false\n' + cell.source
#|export
_imps = {ast.Import, ast.ImportFrom}
def _show_docs(trees):
return [t for t in trees if isinstance(t,ast.Expr) and nested_attr(t, 'value.func.id')=='show_doc']
_show_dirs = {'export','exports'}
def _do_eval(cell):
if cell_lang(cell) != 'python': return
trees = cell.parsed_()
if cell.cell_type != 'code' or not trees: return
if cell.directives_.get('eval:', [''])[0].lower() == 'false': return
if cell.directives_.keys() & _show_dirs or filter_ex(trees, risinstance(_imps)): return True
if _show_docs(trees): return True
#|export
class exec_show_docs:
"Execute cells needed for `show_docs` output, including exported cells and imports"
def __init__(self, nb):
self.k = CaptureShell()
if nb_lang(nb) == 'python': self.k.run_cell('from nbdev.showdoc import show_doc')
def __call__(self, cell):
flags = getattr(cell.nb, '_nbflags', [])
if 'skip_showdoc' in flags: return
if _do_eval(cell): self.k.cell(cell)
if self.k.exc: raise Exception(f'Error: cell {cell.idx_}:\n{cell.source}') from self.k.exc[1]
res = _run_procs(exec_show_docs)
assert res
#|export
def populate_language(nb):
"Insert cell language indicator based on notebook metadata. You should to use this before `lang_identify`"
for cell in nb.cells:
if cell.cell_type == 'code': cell.metadata.language = nb_lang(nb)
#|hide
res = _run_procs(preprocs=[populate_language], return_nb=True)
assert set(L(res.cells).attrgot('metadata').attrgot('language').filter()) == {'python'}
#|hide
# integration test with hide_line
_nb = _run_procs(hide_line, preprocs=[populate_language], path='../tests/APL.ipynb')
assert 'hide_line' not in _nb
#| export
def insert_warning(nb):
"Insert Autogenerated Warning Into Notebook after the first cell."
content = "<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->"
nb.cells.insert(1, mk_cell(content, 'markdown'))
This preprocessor inserts a warning in the markdown destination that the file is autogenerated. This warning is inserted in the second cell so we do not interfere with front matter.
res = _run_procs(preprocs=[insert_warning])
assert "<!-- WARNING: THIS FILE WAS AUTOGENERATED!" in res
L('foo', None, 'a').filter(lambda x:x == 1)
_tstre = re.compile('a')
#|export
_def_types = (ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef)
def _def_names(cell, shown):
return [showdoc_nm(o) for o in concat(cell.parsed_())
if isinstance(o,_def_types) and o.name not in shown and o.name[0]!='_']
def _get_nm(tree):
i = tree.value.args[0]
if hasattr(i, 'id'): val = i.id
else: val = try_attrs(i.value, 'id', 'func', 'attr')
return f'{val}.{i.attr}' if isinstance(i, ast.Attribute) else i.id
#|export
def add_show_docs(nb):
"Add show_doc cells after exported cells, unless they are already documented"
def _want(c):
return c.source and c.cell_type=='code' and ('export' in c.directives_ or 'exports' in c.directives_)
exports = L(cell for cell in nb.cells if _want(cell))
trees = nb.cells.map(NbCell.parsed_).concat()
shown_docs = {_get_nm(t) for t in _show_docs(trees)}
for cell in reversed(exports):
if cell_lang(cell) != 'python':
raise ValueError(f'{cell.metadata.language} cell attempted export:\n{cell.source}')
for nm in _def_names(cell, shown_docs):
nb.cells.insert(cell.idx_+1, mk_cell(f'show_doc({nm})'))
res = _run_procs(preprocs=[populate_language, add_show_docs])
assert "show_doc(some_func)'" in res
assert "show_doc(and_another)'" in res
assert "show_doc(another_func)'" not in res
#|hide
# this test makes sure @patch works
_nb = _run_procs(preprocs=[populate_language, add_show_docs], return_nb=True, path='../tests/showdoc_test.ipynb')
assert r'show_doc(Foo.a_method)' in L(_nb.cells).attrgot('source')
#| export
_re_title = re.compile(r'^#\s+(.*)[\n\r]+(?:^>\s+(.*))?', flags=re.MULTILINE)
_re_fm = re.compile(r'^---.*\S+.*---', flags=re.DOTALL)
_re_defaultexp = re.compile(r'^\s*#\|\s*default_exp\s+(\S+)', flags=re.MULTILINE)
def _celltyp(nb, cell_type): return nb.cells.filter(lambda c: c.cell_type == cell_type)
def is_frontmatter(nb): return _celltyp(nb, 'raw').filter(lambda c: _re_fm.search(c.get('source', '')))
def _istitle(cell):
txt = cell.get('source', '')
return bool(_re_title.search(txt)) if txt else False
#|export
def _default_exp(nb):
"get the default_exp from a notebook"
code_src = nb.cells.filter(lambda x: x.cell_type == 'code').attrgot('source')
default_exp = first(code_src.filter().map(_re_defaultexp.search).filter())
return default_exp.group(1) if default_exp else None
_testnb = read_nb('../tests/docs_test.ipynb')
test_eq(_default_exp(_testnb), 'foobar')
#|export
def nb_fmdict(nb, remove=True):
"Infer the front matter from a notebook's markdown formatting"
md_cells = _celltyp(nb, 'markdown').filter(_istitle)
if not md_cells: return {}
cell = md_cells[0]
title,desc=_re_title.match(cell.source).groups()
if title:
flags = re.findall('^-\s+(.*)', cell.source, flags=re.MULTILINE)
flags = [s.split(':', 1) for s in flags if ':' in s] if flags else []
flags = merge({k:v for k,v in flags if k and v},
{'title':title}, {'description':desc} if desc else {})
if remove: cell['source'] = None
return flags
else: return {}
_testnb = read_nb('../tests/docs_test.ipynb')
_res = nb_fmdict(_testnb)
test_eq(_res, dict(key1=' value1', key2=' value2', categories=' [c1, c2]', title='a title', description='A description'))
#|hide
_testnb2 = read_nb('../tests/directives.ipynb')
test_eq(nb_fmdict(_testnb2), {})
#|export
DEFAULT_FM_KEYS = ['title', 'description', 'author', 'image', 'categories', 'output-file', 'aliases']
def construct_fm(fmdict:dict, keys = DEFAULT_FM_KEYS):
"construct front matter from a dictionary, but only for `keys`"
if not fmdict: return None
return '---\n'+'\n'.join([f"{k}: {fmdict[k]}" for k in keys if k in fmdict])+'\n---'
_testdict = nb_fmdict(read_nb('../tests/docs_test.ipynb'))
_res = construct_fm(_testdict)
test_eq(len(_res.splitlines()), 5)
print(_res)
--- title: a title description: A description categories: [c1, c2] ---
#|export
def insert_frontmatter(nb, fm_dict:dict, filter_keys:list=DEFAULT_FM_KEYS):
"Add frontmatter into notebook based on `filter_keys` that exist in `fmdict`."
fm = construct_fm(fm_dict, keys=filter_keys)
if fm: nb.cells.insert(0, NbCell(0, dict(cell_type='raw', metadata={}, source=fm, directives_={})))
#|export
def infer_frontmatter(nb):
"Insert front matter if it doesn't exist automatically from nbdev styled markdown."
if is_frontmatter(nb): return
_exp = _default_exp(nb)
_fmdict = merge(nb_fmdict(nb), {'output-file': _exp+'.html'} if _exp else {})
if 'title' in _fmdict: insert_frontmatter(nb, fm_dict=_fmdict)
_raw_res = _run_procs()
_res = _run_procs(postprocs=infer_frontmatter)
assert '# a title' in _raw_res and '# a title' not in _res
assert r'description: A description\n' in _res
assert r'categories: [c1, c2]\n' in _res
assert r'output-file: foobar.html\n---' in _res
#|eval: false
#|hide
from nbdev.doclinks import nbdev_export
nbdev_export()