# Please see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html for introduction
import ipywidgets as widgets
from IPython.display import display, SVG
from tempfile import TemporaryDirectory
# so that we can live-edit Python modules
%load_ext autoreload
%autoreload 2
Some repositories to try:
3 repositories out of 10 most recent eLife papers:
from retrieval import find_repository_urls, fetch_repository
from pathlib import Path
from nbpipeline.visualization.static_graph import static_graph
from nbpipeline.graph import RulesGraph
from nbpipeline.rules import Rule, Group, NotebookRule, is_tracked_in_version_control
# from nbpipeline.rules import discover_notebooks
from os import system, walk, sep
def discover_notebooks(
root_path=Path('.'), ignore=None, ignored_dirs=None, only_tracked_in_git=False,
ignore_prefixes=('__', '.')
):
"""Useful when working with input/output auto-detection"""
ignored_dirs = ignored_dirs or set()
ignore = ignore or set()
names = {}
rules = []
groups: dict[str, Group] = {}
root_path = root_path.absolute()
print(root_path)
for dirpath, _, files in walk(root_path):
dirs = dirpath.split(sep)[1:]
if any(dir.startswith('.') or dir in ignored_dirs for dir in dirs):
continue
for file in files:
if any(file.startswith(prefix) for prefix in ignore_prefixes):
continue
if not file.endswith('.ipynb'):
continue
if only_tracked_in_git and not is_tracked_in_version_control(file):
continue
path = Path(dirpath) / file
if str(path) in ignore:
continue
name = file[:-6]
name = name[0] + name[1:].replace('_', ' ')
if name in names:
print(name, 'already registered', path, names[name])
else:
names[name] = path
group_id = sep.join(dirs) if dirs else None
try:
rule = NotebookRule(name, notebook=path, group=group_id)
except Exception as e:
print(e)
continue
rules.append(rule)
if group_id and group_id not in groups:
groups[group_id] = Group(id=group_id, name=dirs[-1], parent=sep.join(dirs[:-1]))
return {
'rules': rules,
'groups': groups
}
!pip install data_vault
Collecting data_vault Downloading data_vault-0.4.4-py3-none-any.whl (17 kB) Requirement already satisfied: IPython in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (7.27.0) Requirement already satisfied: pandas in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (1.3.3) Requirement already satisfied: matplotlib-inline in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.1.3) Requirement already satisfied: pickleshare in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.7.5) Requirement already satisfied: traitlets>=4.2 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0) Requirement already satisfied: decorator in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0) Requirement already satisfied: jedi>=0.16 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.18.0) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (3.0.20) Requirement already satisfied: backcall in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.2.0) Requirement already satisfied: setuptools>=18.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (58.0.4) Requirement already satisfied: pexpect>4.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (4.8.0) Requirement already satisfied: pygments in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (2.10.0) Requirement already satisfied: parso<0.9.0,>=0.8.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from jedi>=0.16->IPython->data_vault) (0.8.2) Requirement already satisfied: ptyprocess>=0.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pexpect>4.3->IPython->data_vault) (0.7.0) Requirement already satisfied: wcwidth in /srv/conda/envs/notebook/lib/python3.9/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython->data_vault) (0.2.5) Requirement already satisfied: pytz>=2017.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2021.1) Requirement already satisfied: numpy>=1.17.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (1.21.2) Requirement already satisfied: python-dateutil>=2.7.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2.8.2) Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas->data_vault) (1.16.0) Installing collected packages: data-vault Successfully installed data-vault-0.4.4
def generate_dag(repository_url):
tmp_dir = Path('/tmp/nbpipeline')
cache_dir = Path('/tmp/nbpipeline/cache')
Rule.setup(tmp_dir=tmp_dir, cache_dir=cache_dir)
tmp_dir.mkdir(exist_ok=True, parents=True)
cache_dir.mkdir(exist_ok=True, parents=True)
rules = Rule.rules
# Rule.pipeline_config = self
for rule in rules.values():
rule.repository_url = repository_url
dag = RulesGraph(rules).graph
graph_svg = static_graph(dag, options='{"graph": {"rankdir": "LR"}}')
return graph_svg
repository_field = widgets.Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')
analyze_button = widgets.Button(description="Analyze")
output = widgets.Output()
display(
repository_field, analyze_button, output
)
def on_download_button_clicked(b):
with output:
if not repository_field.value:
print('Please provide a repository address')
return
candidates = find_repository_urls(repository_field.value)
if len(candidates) > 2:
# TODO: offer choice
print('More than one candidate address found')
# for now just take the first one
address = next(iter(candidates))
# with TemporaryDirectory() as temp_dir:
temp_dir = 'tmp/test'
print(f"Downloading: {address}")
# fetch_repository(address=address, temp_dir=temp_dir)
print('Downloaded')
Rule.rules = {}
discover_notebooks(
root_path=Path(temp_dir),
# ignored_dirs={'backlog', 'archive'},
# ignore={'notebook_setup.ipynb'},
# only_tracked_in_git=True
)
svg_graph = generate_dag(repository_url=address)
display(SVG(data=svg_graph))
analyze_button.on_click(on_download_button_clicked)
Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')
Button(description='Analyze', style=ButtonStyle())
Output()