#|hide #default_exp clean from nbdev.showdoc import show_doc #|export import io,sys,json,glob,re from fastcore.script import call_parse,Param,bool_arg from fastcore.utils import ifnone from nbdev.imports import Config from nbdev.export import nbglob from pathlib import Path #|hide #For tests only from nbdev.imports import * #|export def rm_execution_count(o): "Remove execution count in `o`" if 'execution_count' in o: o['execution_count'] = None #|export colab_json = "application/vnd.google.colaboratory.intrinsic+json" def clean_output_data_vnd(o): "Remove `application/vnd.google.colaboratory.intrinsic+json` in data entries" if 'data' in o: data = o['data'] if colab_json in data: new_data = {k:v for k,v in data.items() if k != colab_json} o['data'] = new_data #|export def clean_cell_output(cell): "Remove execution count in `cell`" if 'outputs' in cell: for o in cell['outputs']: rm_execution_count(o) clean_output_data_vnd(o) o.get('metadata', o).pop('tags', None) #|export cell_metadata_keep = ["hide_input"] nb_metadata_keep = ["kernelspec", "jekyll", "jupytext", "doc"] #|export def clean_cell(cell, clear_all=False): "Clean `cell` by removing superfluous metadata or everything except the input if `clear_all`" rm_execution_count(cell) if 'outputs' in cell: if clear_all: cell['outputs'] = [] else: clean_cell_output(cell) if cell['source'] == ['']: cell['source'] = [] cell['metadata'] = {} if clear_all else {k:v for k,v in cell['metadata'].items() if k in cell_metadata_keep} tst = {'cell_type': 'code', 'execution_count': 26, 'metadata': {'hide_input': True, 'meta': 23}, 'outputs': [{'execution_count': 2, 'data': { 'application/vnd.google.colaboratory.intrinsic+json': { 'type': 'string'}, 'plain/text': ['sample output',] }, 'output': 'super'}], 'source': 'awesome_code'} tst1 = tst.copy() clean_cell(tst) test_eq(tst, {'cell_type': 'code', 'execution_count': None, 'metadata': {'hide_input': True}, 'outputs': [{'execution_count': None, 'data': {'plain/text': ['sample output',]}, 'output': 'super'}], 'source': 'awesome_code'}) clean_cell(tst1, clear_all=True) test_eq(tst1, {'cell_type': 'code', 'execution_count': None, 'metadata': {}, 'outputs': [], 'source': 'awesome_code'}) tst2 = { 'metadata': {'tags':[]}, 'outputs': [{ 'metadata': { 'tags':[] }}], "source": [ "" ]} clean_cell(tst2, clear_all=False) test_eq(tst2, { 'metadata': {}, 'outputs': [{ 'metadata':{}}], 'source': []}) #|export def clean_nb(nb, clear_all=False): "Clean `nb` from superfluous metadata, passing `clear_all` to `clean_cell`" for c in nb['cells']: clean_cell(c, clear_all=clear_all) nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in nb_metadata_keep } tst = {'cell_type': 'code', 'execution_count': 26, 'metadata': {'hide_input': True, 'meta': 23}, 'outputs': [{'execution_count': 2, 'data': { 'application/vnd.google.colaboratory.intrinsic+json': { 'type': 'string'}, 'plain/text': ['sample output',] }, 'output': 'super'}], 'source': 'awesome_code'} nb = {'metadata': {'kernelspec': 'some_spec', 'jekyll': 'some_meta', 'meta': 37}, 'cells': [tst]} clean_nb(nb) test_eq(nb['cells'][0], {'cell_type': 'code', 'execution_count': None, 'metadata': {'hide_input': True}, 'outputs': [{'execution_count': None, 'data': { 'plain/text': ['sample output',]}, 'output': 'super'}], 'source': 'awesome_code'}) test_eq(nb['metadata'], {'kernelspec': 'some_spec', 'jekyll': 'some_meta'}) #|export def _print_output(nb): "Print `nb` in stdout for git things" _output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') x = json.dumps(nb, sort_keys=True, indent=1, ensure_ascii=False) _output_stream.write(x) _output_stream.write("\n") _output_stream.flush() #|export @call_parse def nbdev_clean_nbs( fname:str=None, # A notebook name or glob to convert clear_all:bool_arg=False, # Clean all metadata and outputs disp:bool_arg=False, # Print the cleaned outputs read_input_stream:bool_arg=False # Read input stram and not nb folder ): "Clean all notebooks in `fname` to avoid merge conflicts" #Git hooks will pass the notebooks in the stdin if read_input_stream and sys.stdin: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') nb = json.load(input_stream) clean_nb(nb, clear_all=clear_all) _print_output(nb) return path = None if fname is None: try: path = get_config().path("nbs_path") except Exception as e: path = Path.cwd() files = nbglob(fname=ifnone(fname,path)) for f in files: if not str(f).endswith('.ipynb'): continue nb = json.loads(open(f, 'r', encoding='utf-8').read()) clean_nb(nb, clear_all=clear_all) if disp: _print_output(nb) else: x = json.dumps(nb, sort_keys=True, indent=1, ensure_ascii=False) with io.open(f, 'w', encoding='utf-8') as f: f.write(x) f.write("\n") #|hide from nbdev.export import notebook2script notebook2script()