#export
import io,sys,json,glob
from fastscript import call_parse,Param
from nbdev.imports import Config
from pathlib import Path
# default_exp clean
#hide
#For tests only
from nbdev.imports import *
Strip notebooks from superfluous metadata
To avoid pointless conflicts while working with jupyter notebooks (with different execution counts or cell metadata), it is recommended to clean the notebooks before commiting anything (done automatically if you install the git hooks with nbdev_install_git_hooks
). The following functions are used to do that.
# export
def rm_execution_count(o):
"Remove execution count in `o`"
if 'execution_count' in o: o['execution_count'] = None
# export
def clean_cell_output(cell):
"Remove execution count in `cell`"
if 'outputs' in cell:
for o in cell['outputs']: rm_execution_count(o)
#export
cell_metadata_keep = ["hide_input"]
nb_metadata_keep = ["kernelspec", "jekyll", "jupytext", "doc"]
# export
def clean_cell(cell, clear_all=False):
"Clean `cell` by removing superluous metadata or everything except the input if `clear_all`"
rm_execution_count(cell)
if 'outputs' in cell:
if clear_all: cell['outputs'] = []
else: clean_cell_output(cell)
cell['metadata'] = {} if clear_all else {k:v for k,v in cell['metadata'].items() if k in cell_metadata_keep}
tst = {'cell_type': 'code',
'execution_count': 26,
'metadata': {'hide_input': True, 'meta': 23},
'outputs': [{'execution_count': 2, 'output': 'super'}],
'source': 'awesome_code'}
tst1 = tst.copy()
clean_cell(tst)
test_eq(tst, {'cell_type': 'code',
'execution_count': None,
'metadata': {'hide_input': True},
'outputs': [{'execution_count': None, 'output': 'super'}],
'source': 'awesome_code'})
clean_cell(tst1, clear_all=True)
test_eq(tst1, {'cell_type': 'code',
'execution_count': None,
'metadata': {},
'outputs': [],
'source': 'awesome_code'})
# export
def clean_nb(nb, clear_all=False):
"Clean `nb` from superfulous metadata, passing `clear_all` to `clean_cell`"
for c in nb['cells']: clean_cell(c, clear_all=clear_all)
nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in nb_metadata_keep }
tst = {'cell_type': 'code',
'execution_count': 26,
'metadata': {'hide_input': True, 'meta': 23},
'outputs': [{'execution_count': 2, 'output': 'super'}],
'source': 'awesome_code'}
nb = {'metadata': {'kernelspec': 'some_spec', 'jekyll': 'some_meta', 'meta': 37},
'cells': [tst]}
clean_nb(nb)
test_eq(nb['cells'][0], {'cell_type': 'code',
'execution_count': None,
'metadata': {'hide_input': True},
'outputs': [{'execution_count': None, 'output': 'super'}],
'source': 'awesome_code'})
test_eq(nb['metadata'], {'kernelspec': 'some_spec', 'jekyll': 'some_meta'})
#export
import io,sys,json
# export
def _print_output(nb):
"Print `nb` in stdout for git things"
_output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
x = json.dumps(nb, sort_keys=True, indent=1, ensure_ascii=False)
_output_stream.write(x)
_output_stream.write("\n")
_output_stream.flush()
# export
@call_parse
def nbdev_clean_nbs(fname:Param("A notebook name or glob to convert", str)=None,
clear_all:Param("Clean all metadata and outputs", bool)=False,
disp:Param("Print the cleaned outputs", bool)=False,
read_input_stream:Param("Read input stram and not nb folder")=False):
"Clean all notebooks in `fname` to avoid merge conflicts"
#Git hooks will pass the notebooks in the stdin
if read_input_stream and sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
nb = json.load(input_stream)
clean_nb(nb, clear_all=clear_all)
_print_output(nb)
return
if fname is None:
try: path = Config().nbs_path
except Exception as e: path = Path.cwd()
files = path.glob('**/*.ipynb') if fname is None else glob.glob(fname)
for f in files:
if not str(f).endswith('.ipynb'): continue
nb = json.load(open(f, 'r', encoding='utf-8'))
clean_nb(nb, clear_all=clear_all)
if disp: _print_output(nb)
else:
x = json.dumps(nb, sort_keys=True, indent=1, ensure_ascii=False)
with io.open(f, 'w', encoding='utf-8') as f:
f.write(x)
f.write("\n")
By default (fname
left to None
), the all the notebooks in lib_folder
are cleaned. You can opt in to fully clean the noteobok by removing every bit of metadata and the cell outputs by passing clear_all=True
. disp
is only used for internal use with git hooks and will print the clean notebook instead of saving it. Same for read_input_stream
that will read the notebook from the input stream instead of the file names.
#hide
from nbdev.export import *
notebook2script()
Converted 00_export.ipynb. Converted 01_sync.ipynb. Converted 02_showdoc.ipynb. Converted 03_export2html.ipynb. Converted 04_test.ipynb. Converted 05_merge.ipynb. Converted 06_cli.ipynb. Converted 07_clean.ipynb. Converted 99_search.ipynb. Converted index.ipynb. Converted tutorial.ipynb.