#default_exp asciidoc
API for the fastdoc convertor
#export
from fastdoc.imports import *
from fastcore.script import *
from warnings import warn
#export
def markdown_cell(md):
return nbformat.notebooknode.NotebookNode({'cell_type': 'markdown', 'source': md, 'metadata': {}})
#export
def code_cell(code, metadata=None, outputs=None):
return nbformat.notebooknode.NotebookNode(
{'cell_type': 'code',
'execution_count': None,
'source': code,
'metadata': {} if metadata is None else metadata,
'outputs': [] if outputs is None else outputs})
Removing cells with the flag # hide
#export
_re_hidden = re.compile(r'^\s*#\s*(hide|clean)\s*$', re.MULTILINE)
#export
def remove_hidden_cells(cells):
"Remove cells marked with #hide"
return [c for c in cells if _re_hidden.search(c['source']) is None]
cells = [code_cell('# hide'), code_cell('lalala'), markdown_cell('lalala\n# hide')]
test_eq(remove_hidden_cells(cells), [code_cell('lalala')])
Isolating the bits in triple quotes annotated with asciidoc in code cells without outputs so that they are not interpreted by the converter, with adding ##clear##
so that the post-processing removes the [python]
flag.
#export
def isolate_adoc_blocks(cells):
res = []
for cell in cells:
if cell['cell_type'] == 'markdown' and re.search(r'```\s*asciidoc', cell['source']) is not None:
lines = cell['source'].split('\n')
adoc,s,idx = False,0,0
for line in lines:
if re.search(r'^```\s*asciidoc\s*$', line) is not None and not adoc:
res.append(markdown_cell('\n'.join(lines[s:idx])))
adoc,s = True,idx+1
elif re.search(r'^```\s*$', line) is not None and adoc:
res.append(code_cell('##clear##' + '\n'.join(lines[s:idx])))
adoc,s = False,idx+1
idx+=1
assert not adoc, f"Triple-quote asciidoc block not ended in {cell['source']}"
res.append(markdown_cell('\n'.join(lines[s:])))
else: res.append(cell)
return res
test = """This is some text
```asciidoc
This should be isolated
```
Some other text
```asciidoc
This should also be isolated
```
end"""
test_eq(isolate_adoc_blocks([markdown_cell(test)]), [
markdown_cell("This is some text"),
code_cell("##clear##This should be isolated"),
markdown_cell("Some other text"),
code_cell("##clear##This should also be isolated"),
markdown_cell("end")
])
Old way of putting [WARNING]
, [NOTE]
or [TIP]
#export
#TODO: remove when all notebooks have been ported to v2
def replace_old_jekylls(cell):
if cell['source'].startswith('jekyll'):
pat1 = re.compile(r"""jekyll_(.*)\(['"].*""")
pat2 = re.compile(r"""jekyll_.*\(['"]+([\s\S]*[^'"])['"]+\)$""")
jekyll_type = re.match(pat1, cell['source']).groups()[0]
message = re.match(pat2, cell['source']).groups()[0]
inst = {'warn':'WARNING', 'note':'NOTE', 'important':'TIP'}
cell['metadata'] = {}
cell['source'] = f'##clear##[{inst[jekyll_type]}]\n====\n{message}\n===='
cell['outputs'] = []
return cell
test_eq(replace_old_jekylls(code_cell('jekyll_warn("""Try to convert me!""")')),
code_cell('##clear##[WARNING]\n====\nTry to convert me!\n===='))
Hide input of cells with hide_input=True
in metadata (extension hide input) or a flag #hide_input
. Put ##remove##
instead of the code that will be removed during post-processing
#export
_re_hide_input = re.compile(r'^\s*#\s*hide_input\s*$', re.MULTILINE)
#export
def hide_input(cell):
if cell['metadata'].get('hide_input', False) or _re_hide_input.search(cell["source"]) is not None: cell['source'] = '##remove##'
return cell
test_eq(hide_input(code_cell('some code', metadata={'hide_input': True}, outputs=[1])),
code_cell('##remove##', metadata={'hide_input': True}, outputs=[1]))
test_eq(hide_input(code_cell('# hide_input\nsome code', outputs=[1])),
code_cell('##remove##', outputs=[1]))
Hide outputs of cells with collapsed=True
in their metadata or a flag #hide_output
#export
_re_hide_output = re.compile(r'^\s*#\s*hide_output\s*$', re.MULTILINE)
#export
def hide_output(cell):
if cell['metadata'].get('collapsed', False) or _re_hide_output.search(cell["source"]) is not None:
cell['outputs'] = []
cell['source'] = re.sub(r'#\s*hide_output\s*\n', '', cell['source'])
return cell
test_eq(hide_output(code_cell('some code', metadata={'collapsed': True}, outputs=[1])),
code_cell('some code', metadata={'collapsed': True}))
test_eq(hide_output(code_cell('# hide_output\nsome code', outputs=[1])),
code_cell('some code'))
Replace outputs as text_html
by text_plain
(otherwise they are not kept)
#export
def extract_html(cell):
for o in cell['outputs']:
if 'data' in o and 'text/html' in o['data']:
o['data']['text/plain'] = o['data']['text/html']
del o['data']['text/html']
return cell
test_eq(extract_html(code_cell('some code', outputs=[{'data': {'text/html': 'some_html'}}])),
code_cell('some code', outputs=[{'data': {'text/plain': 'some_html'}}]))
Deal with errors by putting them in plain text
#export
def split_max_len(text, l):
words = text.split(' ')
line,lines = "",[]
for word in words:
if len(line) + len(word) + 1 <= l: line += f' {word}'
else:
lines.append(line)
line = ""
if len(line) > 0: lines.append(line)
return "\n".join(lines)
#export
def deal_error(cell):
for i,out in enumerate(cell['outputs']):
if out['output_type'] == 'error':
msg = f"{out['ename']}: {out['evalue']}"
cell['outputs'][i] = nbformat.notebooknode.NotebookNode({
'data': {'text/plain': split_max_len(msg, 81) },
'execution_count': None,
'metadata': {},
'output_type': 'execute_result'})
return cell
test_eq(deal_error(code_cell('some code', outputs=[{'output_type': 'error', 'ename': 'Error name', 'evalue': 'This is an error.'}])),
code_cell('some code', outputs = [
{'data': {'text/plain': ' Error name: This is an error.'},
'execution_count': None,
'metadata': {},
'output_type': 'execute_result'}
]))
Remove interrupted progress bars from the outputs
#export
def remove_interrupted_pbars(cell):
outs = []
for out in cell['outputs']:
if 'data' not in out or 'text/plain' not in out['data'] or 'progress-bar-interrupted' not in out['data']['text/plain']:
outs.append(out)
cell['outputs'] = outs
return cell
test_eq(remove_interrupted_pbars(
code_cell("some code", outputs = [{'a': 1}, {'data': {'text/plain': 'progress-bar-interrupted'}}, {'b': 2}])),
code_cell("some code", outputs = [{'a': 1}, {'b': 2}]))
Get metadata for outputs.
#export
def get_cell_meta(cell):
for attr in ["id", "caption", "alt", "width"]:
if re.search(r'^\s*#\s*' + attr + r'\s(.*)$', cell["source"], re.MULTILINE) is not None:
cell["metadata"][attr] = re.search(r'^\s*#\s*' + attr + r'\s(.*)$', cell["source"], re.MULTILINE).groups()[0]
cell["source"] = re.sub(r'#\s*' + attr + r'\s.*?($|\n)', '', cell["source"])
return cell
test_eq(get_cell_meta(code_cell("#id 123\n#caption This is a bear\nsome code")),
code_cell("some code", metadata = {'id': '123', 'caption': 'This is a bear'}))
Deal with table captions and refs
#export
def caption_tables(cell):
if 'outputs' not in cell or len(cell['outputs']) == 0: return cell
output = cell['outputs'][0]
if 'data' not in output or 'text/plain' not in output['data']: return cell
text = output['data']['text/plain']
if re.search(r'^<\s*table\s+([^>]*>)', text) is None: return cell
table_id = cell['metadata'].get('id', None)
caption = cell['metadata'].get('caption', None)
text_id = '' if table_id is None else f'id="{table_id}" '
text_caption = '' if caption is None else f'\n <caption>{caption}</caption>'
output['data']['text/plain'] = re.sub(r'^<\s*table\s+([^>]*>)', '<table '+text_id+r'\1'+text_caption, text)
cell['outputs'][0] = output
return cell
cell = code_cell("some code",
metadata={'id': '123', 'caption': 'a caption'},
outputs=[{'data': {'text/plain': '<table border="1">\nTable code'}}])
cell2 = code_cell("some code",
metadata={'id': '123', 'caption': 'a caption'},
outputs=[{'data': {'text/plain': '<table id="123" border="1">\n <caption>a caption</caption>\nTable code'}}])
test_eq(caption_tables(cell), cell2)
cell = code_cell("#hide_input\n#id 123\n#caption a caption",
metadata={},
outputs=[{'data': {'text/plain': '<table border="1">\nTable code'}, 'output_type':''}])
Wrap text in outputs
#export
TEXT_MAX_WIDTH = 80
#export
def _wrap_output(output):
if 'text' in output:
lines = ['\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in output['text'].split('\n')]
output['text'] = '\n'.join(lines)
return output
if ('data' not in output or 'text/plain' not in output['data']): return output
text = output['data']['text/plain']
if re.search(r'^<\s*table\s*([^>]*>)', text) is not None: return output
lines = ['\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in text.split('\n')]
output['data']['text/plain'] = '\n'.join(lines)
return output
#export
def wrap_text_outputs(cell):
if 'outputs' not in cell or len(cell['outputs']) == 0: return cell
cell['outputs'] = [_wrap_output(o) for o in cell['outputs']]
return cell
cell = code_cell("some code",
metadata={},
outputs=[{'data': {'text/plain': 'This is a long output'*5}, 'output_type':''},
{'text': 'This is a long output'*5}])
wrapped = 'This is a long outputThis is a long outputThis is a long outputThis is a long\n > outputThis is a long output'
test_eq(wrap_text_outputs(cell), code_cell("some code",
metadata={},
outputs=[{'data': {'text/plain': wrapped}, 'output_type':''},
{'text': wrapped}]))
Test code length
#export
CODE_MAX_LEN = 80
#export
def check_code_len(cell):
lines = cell['source'].split('\n')
for l in lines:
if len(l) > CODE_MAX_LEN: warn(f"Found code too long in a cell:\n{cell['source']}")
return cell
Replace "` `" by ``
#export
def deal_quotes(cell):
cell['source'] = re.sub(r'"`([^`]*)`"', r'`\1`', cell['source'])
cell['source'] = re.sub(r"'", r'xxsinglequote', cell['source'])
return cell
test_eq(deal_quotes(markdown_cell('"`code`"')), markdown_cell('`code`'))
test_eq(deal_quotes(markdown_cell('a"b"c')), markdown_cell('a"b"c'))
test_eq(deal_quotes(markdown_cell("a'b'c")), markdown_cell('axxsinglequotebxxsinglequotec'))
Add one title level to every Markdown cell
#export
def add_title_level(cell):
if cell['source'].startswith('#'): cell['source'] = '#' + cell['source']
return cell
test_eq(add_title_level(markdown_cell('# title')), markdown_cell('## title'))
Remove digits from numbered lists and format labeled lists
#export
def deal_with_lists(cell):
lines = cell['source'].split('\n')
for i in range(len(lines)):
lines[i] = re.sub(r'(^\s*)\d*\.(.*)$', r'\1.\2xxnewl', lines[i])
lines[i] = re.sub(r'(^\s*)-\s(.*::)\s(.*)$', r'\2xxnewls\3xxnewl', lines[i])
cell['source'] = '\n'.join(lines)
return cell
test_eq(deal_with_lists(markdown_cell(" 1. Item\n 2. Item")),
markdown_cell(" . Itemxxnewl\n . Itemxxnewl"))
test_eq(deal_with_lists(markdown_cell("- lbl1:: item1\n- lbl2:: item2")),
markdown_cell("lbl1::xxnewlsitem1xxnewl\nlbl2::xxnewlsitem2xxnewl"))
Catch block quotes and put them in asciidoc blocks
#export
_re_block_notes = re.compile(r"""
# Catches any pattern > Title: content with title in group 1 and content in group 2
^\s*>\s* # > followed by any number of whitespace
([^:]*) # Catching group for any character but :
:\s* # : then any number of whitespace
([^\n]*) # Catching group for anything but a new line character
(?:\n|$) # Non-catching group for either a new line or the end of the text
""", re.VERBOSE | re.MULTILINE)
_re_forgot_column = re.compile("^\s*>[^:]*$", re.MULTILINE)
Catch Markdown URLs of the form
[link](https://github.com/fastai)
inside asciidoc blocks. Asciidoc expects URLs to be in the following format:
[BLOCK_NAME]
====
This is a block with some https://github.com/fastai[link]
====
#export
_re_urls = re.compile("\[(.*?)\]\((.*?)\)")
#export
def replace_jekylls(cell):
block_names = {'warning':'WARNING', 'note':'NOTE', 'important':'TIP', 'tip': 'TIP', 'stop': 'WARNING',
'jargon':'JARGON', 'question':'QUESTION', 'a': 'ALEXIS', 'j': 'JEREMY', 's': 'SYLVAIN'}
def _rep(m):
typ,text = m.groups()
text = re.sub(_re_urls, r"\2[\1]", text)
name = block_names.get(typ.lower(), typ.upper())
if name in ['ALEXIS', 'JEREMY', 'SYLVAIN', 'JARGON', 'QUESTION']:
title = name[0]+name[1:].lower()
surro = 'NOTE'
if name=='JARGON':
splits = text.split(': ')
title = f'{title}: {splits[0]}'
text = re.sub(_re_urls, r"\2[\1]", ': '.join(splits[1:]))
if name in ['ALEXIS', 'JEREMY', 'SYLVAIN']:
title = f"{title} says"
surro = 'TIP'
return f'```asciidoc\n.{title}\n[{surro}]\n====\n{text}\n====\n```\n'
elif len(name) != 0: return f"```asciidoc\n[{name}]\n====\n{text}\n====\n```\n"
else: return f"```asciidoc\n____\n{text}\n____\n```\n"
if _re_forgot_column.search(cell["source"]): warn("Found a non-processed block quote, please fix")
cell["source"] = _re_block_notes.sub(_rep, cell["source"])
return cell
test_eq(replace_jekylls(markdown_cell("text\n> : This is a block quote")),
markdown_cell("text\n```asciidoc\n____\nThis is a block quote\n____\n```\n"))
test_eq(replace_jekylls(markdown_cell("text\n> : This is a block quote with a [link](https://github.com/fastai)")),
markdown_cell("text\n```asciidoc\n____\nThis is a block quote with a https://github.com/fastai[link]\n____\n```\n"))
test_eq(replace_jekylls(markdown_cell("text\n> jargon: term: Some new term")),
markdown_cell('text\n```asciidoc\n.Jargon: term\n[NOTE]\n====\nSome new term\n====\n```\n'))
test_eq(replace_jekylls(markdown_cell("text\n> jargon: term: Some new term with a [link](https://github.com/fastai)")),
markdown_cell('text\n```asciidoc\n.Jargon: term\n[NOTE]\n====\nSome new term with a https://github.com/fastai[link]\n====\n```\n'))
test_warns(lambda: replace_jekylls(markdown_cell("text\n> This is a block quote")))
#export
_re_sidebar = re.compile(r'^\s*#\s*sidebar\s(.*)$', re.MULTILINE)
#export
def interpret_sidebar(cell):
lines = cell["source"].split("\n")
if _re_sidebar.search(lines[0]) is not None:
title = _re_sidebar.search(lines[0]).groups()[0]
body = "\n".join(lines[1:])
cell["source"] = f"```asciidoc\n.{title}\n****\n{body}\n****\n```\n"
return cell
test = """#sidebar My intervention
This will be changed to a sidebar when converted in Asciidoc.
It can have several lines, contrary to a block quote."""
interpret_sidebar(markdown_cell(test))
{'cell_type': 'markdown', 'source': '```asciidoc\n.My intervention\n****\n\nThis will be changed to a sidebar when converted in Asciidoc.\n\nIt can have several lines, contrary to a block quote.\n****\n```\n', 'metadata': {}}
#export
_re_md_image = re.compile(r"^(<img\ [^>]*>)", re.MULTILINE)
#export
IMAGE_CONV_MULT = 0.6
#export
def process_images(cell):
h = HTMLParseAttrs()
def _rep(m):
d = h(m.groups()[0])
attrs = ['"' + d.get('alt', '') + '"']
if 'width' in d: attrs.append(str(int(IMAGE_CONV_MULT * int(d['width']))))
if 'width' in d and 'height' in d: attrs.append(str((int(IMAGE_CONV_MULT * int(d['height'])))))
suff = f"[{', '.join(attrs)}]"
pid = f"[[{d['id']}]]\n" if 'id' in d else ""
caption = f".{d['caption']}\n" if 'caption' in d else ""
return f"```asciidoc\n{pid}{caption}image::{d['src']}{suff}\n```"
cell["source"] = _re_md_image.sub(_rep, cell["source"])
return cell
txt = 'text\n<img alt="Alternative text" width="700" caption="This is an image" src="puppy.jpg" id="123"/>\nother text'
test_eq(process_images(markdown_cell(txt)),
markdown_cell('text\n```asciidoc\n[[123]]\n.This is an image\nimage::puppy.jpg["Alternative text", 420]\n```\nother text'))
#export
_re_reference = re.compile(r'<<([^>]*)>>')
#export
def wrap_references(cell):
cell["source"] = _re_reference.sub(r'xxref\1xxeref', cell["source"])
return cell
test_eq(wrap_references(markdown_cell("There is a reference <<ref>> here.")),
markdown_cell("There is a reference xxrefrefxxeref here."))
#export
def extract_attachments(cell, dest):
if not 'attachments' in cell: return cell
mime,img = first(first(cell['attachments'].values()).items())
ext = mime.split('/')[1]
for i in range(99999):
p = dest/(f'att_{i:05d}.{ext}')
if not p.exists(): break
p.write_bytes(b64decode(img))
del(cell['attachments'])
cell['source'] = re.sub('attachment:image.png', str(p), cell['source'])
return cell
Catch sidebars: sidebars are delimited by header cells like ### Sidebar title
then ### End sidebar
#export
_re_sidebar_title = re.compile(r'#+\s+Sidebar:\s+(.*)$', re.IGNORECASE)
_re_end_sidebar = re.compile(r'#+\s+End sidebar', re.IGNORECASE)
_re_sidebar_title.search('### Sidebar: Tenacity in deep learning').groups()
('Tenacity in deep learning',)
#export
def sidebar_headers(cell):
cell['source'] = _re_sidebar_title.sub(r'```asciidoc\n.\1\n****\n```', cell['source'])
cell['source'] = _re_end_sidebar.sub(r'```asciidoc\n****\n```', cell['source'])
return cell
test_eq(sidebar_headers(markdown_cell("### Sidebar: My intervention")),
markdown_cell("```asciidoc\n.My intervention\n****\n```"))
test_eq(sidebar_headers(markdown_cell("### End sidebar")), markdown_cell("```asciidoc\n****\n```"))
#export
code_cell_tfms = [get_cell_meta, replace_old_jekylls, hide_input, hide_output, extract_html, deal_error,
remove_interrupted_pbars, wrap_text_outputs, caption_tables, check_code_len]
md_cell_tfms = [deal_quotes, wrap_references, interpret_sidebar, sidebar_headers, add_title_level, deal_with_lists,
process_images, replace_jekylls]
Raw cells just need to have a new line added at the beginning
#export
def add_new_line(cell):
cell['source'] = '\n' + cell['source']
return cell
#export
def treat_notebook(nb, dest):
nb['cells'] = remove_hidden_cells(nb['cells'])
tfm_func = {'code': compose(*code_cell_tfms), 'markdown': compose(partial(extract_attachments, dest=dest), *md_cell_tfms),
'raw': add_new_line}
nb['cells'] = [tfm_func[c['cell_type']](c) for c in nb['cells']]
nb['cells'] = isolate_adoc_blocks(nb['cells'])
return nb
Replace special tokens by their values
#export
def rep_spec_tok(adoc, metadata=None):
adoc = re.sub('xxsinglequote', "'", adoc)
adoc = re.sub('xxnewls', '\n ', adoc)
return re.sub('xxnewl\s', '\n', adoc)
nbconvert will flag the code cells with [ipython3]
, we replace this by [python]
#export
def ipython2python(adoc, metadata=None):
return re.sub(r'\[source, ipython3\]','[source, python]', adoc)
test_eq(ipython2python("[source, ipython3]\n----\nsome code\n----\n"), "[source, python]\n----\nsome code\n----\n")
Remove empty cells or cells flagged for removal (because of hide_input)
#export
def remove_cells(adoc, metadata=None):
adoc = re.sub(r'\n\[source, python\]\n----(\n)*----\n','', adoc)
return re.sub(r'\n\[source, python\]\n----\n##remove##\n----\n','', adoc)
test_eq(remove_cells("lalala\n[source, python]\n----\n\n----\n"), "lalala")
test_eq(remove_cells("lalala\n[source, python]\n----\n##remove##\n----\n"), "lalala")
Clear code cells from the code flag when there is a ##clear##
tag.
#export
_re_clear = re.compile(r'\[source, python\]\n----\n##clear##(.*?)----\n', re.DOTALL)
def clear_cells(adoc, metadata=None): return _re_clear.sub(r'\1', adoc)
test_eq(clear_cells(
"lalala\n[source, python]\n----\n##clear##pure adoc\n----\nfoo\nbla\n[source, python]\n----\n##clear##pure adoc again\n----\nbli"),
"lalala\npure adoc\nfoo\nbla\npure adoc again\nbli")
Format LaTeX equations properly: they arrive either as latexmath:[$equation$]
or latexmath:[\[equation\]]
#export
def format_latex(adoc, metadata=None):
#LaTeX equations
adoc = re.sub(r"latexmath:\[\$([^\$]*)\$\]", r"latexmath:[\\(\1\\)]", adoc)
return re.sub(r"latexmath:\[\\\[(.*)\\\]\]", r"\n[latexmath]\n++++\n\\begin{equation}\n\1\n\\end{equation}\n++++\n", adoc)
test_eq(format_latex(r"latexmath:[$equation$]"), r"latexmath:[\(equation\)]")
test_eq(format_latex(r"latexmath:[\[equation\]]"),
"\n[latexmath]\n++++\n\\begin{equation}\nequation\n\\end{equation}\n++++\n")
Format image outputs and make sure they point to the right folder.
#export
_re_image_output = re.compile(r'----\n!\[(?:svg|png|jpg)\]\((.+)\)\n----')
#export
def format_outputs(adoc, metadata=None):
folder = ({} if metadata is None else metadata).get('folder', '.')
def _rep(m):
name = m.groups()[0]
d = metadata[name] if metadata is not None and name in metadata else {}
attrs = ['"' + d.get('alt', '') + '"']
if 'width' in d: attrs.append(str(d['width']))
if 'width' in d and 'height' in d: attrs.append(str(d['height']))
suff = f"[{', '.join(attrs)}]"
pid = f"[[{d['id']}]]\n" if 'id' in d else ""
caption = f".{d['caption']}\n" if 'caption' in d else ""
return f"{pid}{caption}image::{str(folder)}/{name}{suff}"
return _re_image_output.sub(_rep, adoc)
test_eq(format_outputs('----\n![svg](output.svg)\n----', {'folder':'path', 'output.svg': {'alt': 'alt'}}),
'image::path/output.svg["alt"]')
test_eq(format_outputs('----\n![svg](output.svg)\n----', {'folder':'path', 'output.svg': {'alt': 'alt', 'width': 100}}),
'image::path/output.svg["alt", 100]')
test_eq(format_outputs('----\n![png](output1.png)\n----'),
'image::./output1.png[""]')
Deal with quotes
#export
def fix_quotes(adoc, metadata=None):
return re.sub(r"``([^'`]*)''", r'"\1"', adoc)
test_eq(fix_quotes("``double quotes''"), '"double quotes"')
Put back << >> around refs
#export
def fix_references(adoc, metadata=None): return re.sub(r"xxref(.*)xxeref", r"<<\1>>", adoc)
test_eq(fix_references("There is a reference xxrefrefxxeref here."), "There is a reference <<ref>> here.")
Format tables
#export
def format_tables(adoc, metadata=None):
splits = adoc.split('----')
seps = [''] + ['----' for _ in range(len(splits)-1)] + ['']
for i,s in enumerate(splits):
s = re.sub(r'<div>[\s\S]*<table', '<table', s)
s = re.sub('</div>', '', s)
s = re.sub('<p>', '', s)
s = re.sub('</p>', '', s)
if len(s) > 0 and not s.startswith('\n'): s = '\n' + s
if len(s) > 0 and not s.endswith('\n'): s = s + '\n'
if s.startswith('\n<table'): seps[i],seps[i+1] = '++++','++++'
elif '<table' in s:
res = re.search('<table', s)
begin,end = res.span()
s = s[:begin] + '\n----\n\n++++\n' + s[begin:]
seps[i+1] = '++++'
splits[i] = s
res = ''
for s,c in zip(seps,splits): res = res + s + c
return res.replace('\n\n--------', '')
Just as a personal preference, replace all blocks of three new lines or more by \n\n
#export
def remove_lines(text, metadata=None):
return re.sub(r'\n\n\n\n+([^\n])', r'\n\n\n\1', text)
test_eq(remove_lines('a\n\n\n\n\n\nb'), 'a\n\n\nb')
All together
#export
post_process_tfms = [fix_quotes, rep_spec_tok, ipython2python, remove_cells, clear_cells, format_latex,
format_outputs, fix_references, format_tables, remove_lines]
#export
def post_process(adoc, metadata=None):
if not adoc.startswith('\n'): adoc = '\n' + adoc
adoc = re.sub('xxnewl\s', '\n', adoc)
adoc = compose(*post_process_tfms)(adoc, metadata=metadata)
return adoc.strip()
#export
c = ExportConfig()
exporter = ASCIIDocExporter(c)
exporter.exclude_input_prompt=True
exporter.exclude_output_prompt=True
#export
def add_metadata(nb):
"Stripping removes metadata used in the conversion."
if 'language_info' not in nb['metadata']:
nb['metadata']['language_info'] = {
'codemirror_mode': {'name': 'ipython', 'version': 3},
'file_extension': '.py',
'mimetype': 'text/x-python',
'name': 'python',
'nbconvert_exporter': 'python',
'pygments_lexer': 'ipython3',
'version': '3.7.1'}
return nb
#export
def output_num(n):
m = re.search(r'^output_(\d*)_', n)
if m is None: return
return int(m.groups()[0])
test_eq(output_num('output_31_0.png'), 31)
test_eq(output_num('output_12_0.svg'), 12)
#export
import PIL
#export
IMAGE_OUT_MULT = 0.8
#export
import xml.etree.ElementTree as ET
#export
def get_output_width(name, raw, folder):
if name.endswith('.svg'): return ET.fromstring(raw).attrib['width'].split('.')[0].replace('pt', '')
try: return PIL.Image.open(Path(folder)/name).size[0]
except: return None
#export
def convert_nb(fname, dest_path='.', folder=None):
"Convert a notebook `fname` to html file in `dest_path`."
print(f"Converting {fname}")
fname = Path(fname)
dest_name = fname.with_suffix('.asciidoc').name
if folder is None: folder = Path(dest_path)/f'{fname.stem}_files'
#folder for images. Clear if exists
if folder.exists(): shutil.rmtree(folder)
os.makedirs(folder, exist_ok=True)
nb = add_metadata(treat_notebook(read_nb(fname), folder))
export = exporter.from_notebook_node(nb)
metadata = {'folder': folder.relative_to(dest_path)}
metadata.update({n: nb["cells"][output_num(n)]['metadata'] for n in export[1]['outputs'].keys() if output_num(n) is not None})
for n,o in export[1]['outputs'].items():
with open(Path(folder)/n, 'wb') as f: f.write(o)
w = metadata[n]['width'] if 'width' in metadata[n] else get_output_width(n, o, folder)
if w is not None: metadata[n]['width'] = str(int(IMAGE_OUT_MULT * int(w)))
with open(f'{dest_path}/{dest_name}','w', encoding="utf8") as f:
f.write(post_process(export[0], metadata))
dest = Path('test')
convert_nb('test/_test.ipynb', dest)
Converting test/_test.ipynb
#convert_nb('test/_test.ipynb', Path('test'))
#export
def _copy_images(path, dest_path):
os.makedirs(dest_path, exist_ok=True)
for f in path.iterdir():
if f.is_file(): shutil.copy(f, dest_path/f.name)
if f.is_dir(): _copy_images(f, dest_path/f.name)
#export
def copy_images(path, dest_path):
img_folder = dest_path/"images"
if img_folder.exists(): shutil.rmtree(img_folder)
_copy_images(path/"images", img_folder)
dest = Path('..')/'convert_book'
# copy_images(Path('book'), dest)
#export
def _convert1(fname, dest_path='.'):
try: convert_nb(fname, dest_path=dest_path)
except Exception as e:
print(f"Error in notebook {fname}")
print(e)
#export
@call_parse
def fastdoc_convert_all(
path:str='book', # Path to notebooks
dest_path:str='../convert_book' # Path to generated asciidoc files
):
path,dest_path = Path(path),Path(dest_path)
dest_path.mkdir(parents=True,exist_ok=True)
(path/'images').mkdir(parents=True,exist_ok=True)
nbs = [f for f in path.iterdir() if f.suffix == '.ipynb' and not f.name.startswith('_')]
parallel(_convert1, nbs, dest_path=dest_path)
for f in path.iterdir():
if f.suffix in ['.adoc', '.asciidoc']: shutil.copy(f, dest_path/f.name)
copy_images(path, dest_path)
#convert_all()
from nbdev.export import *
notebook2script()
Converted 00_asciidoc.ipynb. Converted 01_clean.ipynb. Converted index.ipynb.