#!/usr/bin/env python # coding: utf-8 # # Contentmine IPython Magic # # *A doodle by Tony Hirst / @psychemedia* # # This is a first, very weak, attempt at putting together some `contentmine` IPython magics. # # The magics are based on the following conditions: # # - an IPython notebook running in Docker container in privileged mode using Python 3.3+ and with a specified volume mlunted in the container (in the example, I use `/notebooks`; # - the existence of a public Docker image `psychemedia/contentmine` containing the *contentmine* applications: `getpapers`, `norma`, `cmine`; # # There are two ideas at the heart of the demo: # # 1. that we can run commands in Docker containers as commandline commands and get any results files back via a shared folder; # 2. that we can run Docker containers from inside a container (for example, as a commandline command from a code cell in a Jupyter notebook running in a container). # # # As an example, this notebook was run in a container fired up from the following `docker-compose.yaml` file launched with the command `docker-compose up -d`: # ```` # notebook: # image: jupyter/notebook # ports: # - "8899:8888" # volumes: # - ./notebooks:/notebooks # - /var/run/docker.sock:/var/run/docker.sock # privileged: true # ```` # State is passed between the command line Docker container and the notebook container by mounting a specified directory in the command line container on top of a specified directory in the notebook container. Files persist in the notebook container directory; the temporary command line container can writes files to, and read files from this directory and its subdirectories. # # ---- # # Install the magics: # In[1]: from IPython.core.magic import Magics, magics_class, line_magic from IPython.core.magic_arguments import (argument, magic_arguments, parse_argstring) import shutil import shlex import os get_ipython().system('pip3 install docker-py') import docker #Should do this as part of init if not shutil.which("docker"): get_ipython().system('apt-get update && apt-get install -y docker.io') @magics_class class DockerMagics(Magics): #def dockerMagicGetPath(container,mountdir): def dockerMagicGetPath(self,mountdir): cli =docker.Client(base_url='unix://var/run/docker.sock') #if cli.containers(filters={'name':container}): # containerData=cli.inspect_container(container) containers=cli.containers(filters={'id':os.environ['HOSTNAME']}) if containers==[]: return '' else: c=[x['Source'] for x in containers[0]['Mounts'] if 'Destination' in x and x['Destination']==mountdir ] return c[0] #! docker run -v /Users/ajh59/tmp/notebookdockercli/notebooks/downloads:/contentmineself --tty --interactive psychemedia/contentmine getpapers -q rhinocerous -o /contentmineself/rhinocerous -x return '' #getpapers -q rhinocerous -o /contentmine/rhinocerous -x @line_magic def getpapers(self,line): """ Runs a contentmine command: /MOUNTDIR SEARCHTERM %getpapers /notebooks rhinocerous """ mount=self.dockerMagicGetPath(line.strip().split()[0]) if mount=='': print('No container mounted there?') return Q=' '.join(line.strip().split()[1:]) QD=shlex.quote(Q) DD='{}{}'.format(mount,'/contentmineMagic') get_ipython().system(' docker run --rm -v {DD}:/tmp_contentmineMagic --tty --interactive psychemedia/contentmine getpapers -q {Q} -o /tmp_contentmineMagic/{QD} -x') #norma --project /contentmine/aardvark -i fulltext.xml -o scholarly.html --transform nlm2html @line_magic def norma(self,line): """ %norma /notebooks rhinocerous """ mount=self.dockerMagicGetPath(line.strip().split()[0]) if mount=='': print('No container mounted there?') return Q=' '.join(line.strip().split()[1:]) QD=shlex.quote(Q) DD='{}{}'.format(mount,'/contentmineMagic') get_ipython().system(' docker run --rm -v {DD}:/tmp_contentmineMagic --tty --interactive psychemedia/contentmine norma --project /tmp_contentmineMagic/{QD} -i fulltext.xml -o scholarly.html --transform nlm2html') #./contentmine cmine /contentmine/aardvark @line_magic def cmine(self,line): """ %cmine /notebooks rhinocerous """ mount=self.dockerMagicGetPath(line.strip().split()[0]) if mount=='': print('No container mounted there?') return Q=' '.join(line.strip().split()[1:]) QD=shlex.quote(Q) DD='{}{}'.format(mount,'/contentmineMagic') get_ipython().system(' docker run --rm -v {DD}:/tmp_contentmineMagic --tty --interactive psychemedia/contentmine cmine /tmp_contentmineMagic/{QD}') # In[2]: ip = get_ipython() ip.register_magics(DockerMagics) # ---- # # Now for a demo... # In[3]: get_ipython().system('rm -r contentmineMagic/') get_ipython().system('ls') # In[4]: get_ipython().run_line_magic('getpapers', '/notebooks rhinocerous') # In[5]: get_ipython().run_line_magic('norma', '/notebooks rhinocerous') # In[6]: get_ipython().run_line_magic('cmine', '/notebooks rhinocerous') # In[7]: get_ipython().system('ls') # In[8]: get_ipython().system('ls contentmineMagic/') # In[9]: get_ipython().system('ls contentmineMagic/rhinocerous/') # ## Where Next? # # Setting up the shared directories is a bit of a fudge - is there a better way? # # The magics need to be better defined, allowing for the passing of appropriate command line switches, e.g. in `getpapers`, via [`core.magic_arguments`](http://ipython.readthedocs.io/en/stable/api/generated/IPython.core.magic_arguments.html?), for example. # # Need to consider cell magics so we can write a pipeline along the lines of something like: # # %%contentmine /notebooks rhinocerous # getpapers # norma # cmine # # A proper install package needs putting together. # # The magics need generalising up to a generic `docker magic`, and then perhaps back down to magics for a particular application? # # More info: [Defining custom magics](http://ipython.readthedocs.io/en/stable/config/custommagics.html)