#!/usr/bin/env python # coding: utf-8 # # Panama Papers Explorer - py2neo # # This notebook provides a quick start for using py2neo with the Panama Papers neo4j docker container [ryguyrg/neo4j-panama-papers](https://hub.docker.com/r/ryguyrg/neo4j-panama-papers/) [[Github](https://github.com/ryguyrg/panama-neo4j)]. # # The following `docker-compose.yml` file will lauch a Jupyter notebook environment linked to the neo4j container the Panama Papers data. # ```` # #Run with: # #docker-compose up # #Then visit IP_ADDRESS:7474 # #The defaul credentials are neo4j/neo4j # #You will be prompted for a new password - I used: panamapapers # # #Get the Panama Papers neo4j container # neo4j: # image: ryguyrg/neo4j-panama-papers # ports: # - "7474:7474" # - "1337:1337" # volumes: # - /opt/data # # #Download a Jupyter notebook environment and link the neo4j container to it # jupyterscipy: # image: jupyter/scipy-notebook # ports: # - "8890:8888" # links: # - neo4j:neo4j # volumes: # - ./notebooks:/home/jovyan/work # # ##If you prefer an RStudio environment: # #rstudio: # # image: rocker/rstudio # # ports: # # - "8787:8787" # # links: # # - neo4j:neo4j # # volumes: # # - ./rstudio:/home/rstudio # ##Then install: install.packages('RNeo4j', repos="http://cran.rstudio.com/") # ```` # ## py2neo # # `py2neo` provides a Python wrapper for neo4j. # In[1]: get_ipython().system('pip3 install py2neo') from py2neo import Graph # Create a connection to the `neo4j` database, using the `neo4j` user account and your new password: # In[2]: graph = Graph("http://neo4j:7474/db/data/",user='neo4j',password='panamapapers') # Start to explore the graph - what node types are there? # In[3]: graph.node_labels # For each node type, what attributes/labels are there? # In[4]: for nl in graph.node_labels: print(nl) for i in graph.schema.get_indexes(nl): print('\t{}'.format(i)) # What relationship types are there? # In[5]: graph.relationship_types # ## Officer Searches # # Some simple searches on Officers. # In[62]: #Find officers whose name partially string matches the supplied name name='Smith' for r in graph.run("MATCH (a:Officer) WHERE a.name CONTAINS '{}' RETURN a.name LIMIT 10".format(name)): print(r) # In[99]: #Return the fill record for officers whose name partially string matches the supplied name #Make case insenstive by casting all parts to uppercase exact_name='David Smith' for r in graph.run("MATCH (a:Officer) WHERE UPPER(a.name) ='{}' RETURN a LIMIT 10".format(name.upper())): print(r) # In[63]: p=graph.find_one('Officer','name',exact_name) p # In[80]: #Find what things a person is connected to at the node level as a parent for c in graph.match(start_node=p): print(c) print(c.start_node()) print(c.type()) print(c.end_node()) print('----') # In[43]: #What companies is the person associated with in more detail for a specified relationship type? q=''' MATCH (o:Officer), (e:Entity) WHERE o.name='{}' AND (o)-[:SHAREHOLDER_OF]-(e) RETURN o,e LIMIT 10 '''.format(name) for r in graph.run(q): print(r) # In[46]: exact_entity_name='PORTONES DEL MAR INVESTORS INC.' e=graph.find_one('Entity','name',exact_entity_name) e # In[54]: #Find what things a company is connected to at the node level - company is an "end_node" for c in graph.match(end_node=e,limit=10): print(c) # In[105]: iid=e['internal_id'] for r in graph.run("MATCH (a:Entity) WHERE a.internal_id ='{}' RETURN a LIMIT 10".format(iid)): print(r) # In[ ]: