This notebook provides a quick start for using py2neo with the Panama Papers neo4j docker container ryguyrg/neo4j-panama-papers [Github].
The following docker-compose.yml
file will lauch a Jupyter notebook environment linked to the neo4j container the Panama Papers data.
#Run with:
#docker-compose up
#Then visit IP_ADDRESS:7474
#The defaul credentials are neo4j/neo4j
#You will be prompted for a new password - I used: panamapapers
#Get the Panama Papers neo4j container
neo4j:
image: ryguyrg/neo4j-panama-papers
ports:
- "7474:7474"
- "1337:1337"
volumes:
- /opt/data
#Download a Jupyter notebook environment and link the neo4j container to it
jupyterscipy:
image: jupyter/scipy-notebook
ports:
- "8890:8888"
links:
- neo4j:neo4j
volumes:
- ./notebooks:/home/jovyan/work
##If you prefer an RStudio environment:
#rstudio:
# image: rocker/rstudio
# ports:
# - "8787:8787"
# links:
# - neo4j:neo4j
# volumes:
# - ./rstudio:/home/rstudio
##Then install: install.packages('RNeo4j', repos="http://cran.rstudio.com/")
py2neo
provides a Python wrapper for neo4j.
!pip3 install py2neo
from py2neo import Graph
Requirement already satisfied (use --upgrade to upgrade): py2neo in /opt/conda/lib/python3.5/site-packages
You are using pip version 8.1.1, however version 8.1.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
Create a connection to the neo4j
database, using the neo4j
user account and your new password:
graph = Graph("http://neo4j:7474/db/data/",user='neo4j',password='panamapapers')
Start to explore the graph - what node types are there?
graph.node_labels
frozenset({'Address', 'Entity', 'Intermediary', 'Officer'})
For each node type, what attributes/labels are there?
for nl in graph.node_labels:
print(nl)
for i in graph.schema.get_indexes(nl):
print('\t{}'.format(i))
Intermediary name sourceID internal_id address valid_until country_codes countries status Address address icij_id valid_until countries country_code Officer name address icij_id valid_until countries country_code Entity name original_name address incorporation_date inactivation_date struck_off_date company_type service_provider jurisdiction status sourceID
What relationship types are there?
graph.relationship_types
frozenset({'ALTERNATE_DIRECTOR_OF', 'APPOINTOR_OF', 'ASSISTANT_SECRETARY_OF', 'AUDITOR_OF', 'AUTHORISED_PERSON_SIGNATORY_OF', 'AUTHORIZED_SIGNATORY_OF', 'AUTH_REPRESENTATIVE_OF', 'BANK_SIGNATORY_OF', 'BENEFICIAL_OWNER_OF', 'BENEFICIARY_OF', 'BENEFICIARY_SHAREHOLDER_AND_DIRECTOR_OF', 'BOARD_REPRESENTATIVE_OF', 'CHAIRMAN_OF', 'CONNECTED_OF', 'CORRESPONDENT_ADDR_OF', 'COTRUSTEE_OF_TRUST_OF', 'CUSTODIAN_OF', 'DIRECTOR_AND_SHAREHOLDER_OF', 'DIRECTOR_BENEFICIAL_OWNER_OF', 'DIRECTOR_OF', 'DIRECTOR_RAMI_MAKHLOUF_OF', 'DIRECTOR_SHAREHOLDER_BENEFICIAL_OWNER_OF', 'DIRECTOR_SHAREHOLDER_OF', 'FIRST_BENEFICIARY_OF', 'GENERAL_ACCOUNTANT_OF', 'GRANTEE_OF_A_MORTGAGE_OF', 'INTERMEDIARY_OF', 'INVESTMENT_ADVISOR_OF', 'JOINT_SETTLOR_OF', 'LEGAL_ADVISOR_OF', 'MEMBER_OF_FOUNDATION_COUNCIL_OF', 'MEMBER_SHAREHOLDER_OF', 'NOMINATED_PERSON_OF', 'NOMINEE_BENEFICIAL_OWNER_OF', 'NOMINEE_BENEFICIARY_OF', 'NOMINEE_DIRECTOR_OF', 'NOMINEE_INVESTMENT_ADVISOR_OF', 'NOMINEE_NAME_OF', 'NOMINEE_PROTECTOR_OF', 'NOMINEE_SECRETARY_OF', 'NOMINEE_SHAREHOLDER_OF', 'NOMINEE_TRUST_SETTLOR_OF', 'OFFICER_OF', 'OWNER_DIRECTOR_AND_SHAREHOLDER_OF', 'OWNER_OF', 'PARTNER_OF', 'PERSONAL_DIRECTORSHIP_OF', 'POWER_OF_ATTORNEY_OF', 'POWER_OF_ATTORNEY_SHAREHOLDER_OF', 'PRESIDENT_AND_DIRECTOR_OF', 'PRESIDENT_DIRECTOR_OF', 'PRESIDENT_OF', 'PRINCIPAL_BENEFICIARY_OF', 'PROTECTOR_OF', 'RECORDS_REGISTERS_OF', 'REGISTERED_ADDRESS', 'REGISTER_OF_DIRECTOR_OF', 'REGISTER_OF_SHAREHOLDER_OF', 'RELATED_ENTITY', 'RESERVE_DIRECTOR_OF', 'RESIDENT_DIRECTOR_OF', 'SAFEKEEPING_OF', 'SAME_ADDRESS_AS', 'SAME_NAME_AND_REGISTRATION_DATE_AS', 'SECRETARY_OF', 'SHAREHOLDER_OF', 'SHAREHOLDER_THROUGH_JULEX_FOUNDATION_OF', 'SIGNATORY_OF', 'SIMILAR_NAME_AND_ADDRESS_AS', 'SOLE_SHAREHOLDER_OF', 'SOLE_SIGNATORY_BENEFICIAL_OWNER_OF', 'SOLE_SIGNATORY_OF', 'STOCKBROKER_OF', 'SUCCESSOR_PROTECTOR_OF', 'TAX_ADVISOR_OF', 'TREASURER_OF', 'TRUSTEE_OF_TRUST_OF', 'TRUST_SETTLOR_OF', 'UNIT_TRUST_REGISTER_OF', 'VICE_PRESIDENT_OF'})
Some simple searches on Officers.
#Find officers whose name partially string matches the supplied name
name='Smith'
for r in graph.run("MATCH (a:Officer) WHERE a.name CONTAINS '{}' RETURN a.name LIMIT 10".format(name)):
print(r)
('a.name': 'Derek Smith') ('a.name': 'Shane Micheal Smith') ('a.name': 'Howard James Smith') ('a.name': 'Michael Smith') ('a.name': 'Clive Chester Ambler-Smith & Jennifer Anne Ambler-Smith') ('a.name': 'Clive Chester Ambler-Smith+Jennifer Anne') ('a.name': 'Bruce & Lindsay Irvine-Smith') ('a.name': 'Clive Chester Amber Smith + Jennifer Anne Amber Smith') ('a.name': 'David Smith') ('a.name': 'Charles Jonathan Smith')
#Return the fill record for officers whose name partially string matches the supplied name
#Make case insenstive by casting all parts to uppercase
exact_name='David Smith'
for r in graph.run("MATCH (a:Officer) WHERE UPPER(a.name) ='{}' RETURN a LIMIT 10".format(name.upper())):
print(r)
('a': (c8d59aa:Officer {icij_id:"FFA4727CAD8680B2F2617CAF2E0E06F6",name:"Derek Smith",node_id:"12012101",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"}))
p=graph.find_one('Officer','name',exact_name)
p
(f34eb96:Officer {countries:"Hong Kong",country_codes:"HKG",icij_id:"4F707CA3D79C9D37AB009C56AB40889E",name:"David Smith",node_id:"12101653",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"})
#Find what things a person is connected to at the node level as a parent
for c in graph.match(start_node=p):
print(c)
print(c.start_node())
print(c.type())
print(c.end_node())
print('----')
(f34eb96)-[:REGISTERED_ADDRESS]->(c91019d) (f34eb96:Officer {countries:"Hong Kong",country_codes:"HKG",icij_id:"4F707CA3D79C9D37AB009C56AB40889E",name:"David Smith",node_id:"12101653",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"}) REGISTERED_ADDRESS (c91019d:Address {address:"19/F; Shiu Fung Hong Building; 239-241 Wing Lok Street; Hong Kong",countries:"Hong Kong",country_codes:"HKG",icij_id:"7EF9C62B86C7A2042760F15FF7D43C75",node_id:"14006567",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"}) ---- (f34eb96)-[:SHAREHOLDER_OF]->(b0574b0) (f34eb96:Officer {countries:"Hong Kong",country_codes:"HKG",icij_id:"4F707CA3D79C9D37AB009C56AB40889E",name:"David Smith",node_id:"12101653",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"}) SHAREHOLDER_OF (b0574b0:Entity {address:"CAPITAL CORPORATE SERVICES LIMITED UNIT F; 7 FLOOR; CNT TOWER 338 HENNESSY ROAD WANCHAI; HONG KONG",countries:"Hong Kong",country_codes:"HKG",ibcRUC:"37837",incorporation_date:"10-DEC-1990",internal_id:"502746",jurisdiction:"BVI",jurisdiction_description:"British Virgin Islands",name:"VINA OSCAR HOTEL LIMITED",node_id:"10197522",original_name:"VINA OSCAR HOTEL LIMITED",service_provider:"Mossack Fonseca",sourceID:"Panama Papers",status:"Active",valid_until:"The Panama Papers data is current through 2015"}) ----
#What companies is the person associated with in more detail for a specified relationship type?
q='''
MATCH (o:Officer), (e:Entity)
WHERE o.name='{}' AND (o)-[:SHAREHOLDER_OF]-(e)
RETURN o,e LIMIT 10
'''.format(name)
for r in graph.run(q):
print(r)
('o': (c8d59aa:Officer {icij_id:"FFA4727CAD8680B2F2617CAF2E0E06F6",name:"Derek Smith",node_id:"12012101",sourceID:"Panama Papers",valid_until:"The Panama Papers data is current through 2015"}), 'e': (e9ff007:Entity {address:"INFANTE & PEREZ ALMILLANO CL. 50 Y 74 SAN FRANCISCO; PH 909; PISOS 12 Y 14 APARTADO POSTAL 0830-00142; Z; PANAMA",countries:"Panama",country_codes:"PAN",incorporation_date:"09-JAN-2008",internal_id:"6036924",jurisdiction:"BVI",jurisdiction_description:"British Virgin Islands",name:"PORTONES DEL MAR INVESTORS INC.",node_id:"10208791",original_name:"PORTONES DEL MAR INVESTORS INC.",service_provider:"Mossack Fonseca",sourceID:"Panama Papers",status:"Active",valid_until:"The Panama Papers data is current through 2015"}))
exact_entity_name='PORTONES DEL MAR INVESTORS INC.'
e=graph.find_one('Entity','name',exact_entity_name)
e
(e9ff007:Entity {address:"INFANTE & PEREZ ALMILLANO CL. 50 Y 74 SAN FRANCISCO; PH 909; PISOS 12 Y 14 APARTADO POSTAL 0830-00142; Z; PANAMA",countries:"Panama",country_codes:"PAN",incorporation_date:"09-JAN-2008",internal_id:"6036924",jurisdiction:"BVI",jurisdiction_description:"British Virgin Islands",name:"PORTONES DEL MAR INVESTORS INC.",node_id:"10208791",original_name:"PORTONES DEL MAR INVESTORS INC.",service_provider:"Mossack Fonseca",sourceID:"Panama Papers",status:"Active",valid_until:"The Panama Papers data is current through 2015"})
#Find what things a company is connected to at the node level - company is an "end_node"
for c in graph.match(end_node=e,limit=10):
print(c)
(b70503d)-[:INTERMEDIARY_OF]->(e9ff007) (e6242b5)-[:SHAREHOLDER_OF]->(e9ff007) (ff1d840)-[:SHAREHOLDER_OF]->(e9ff007) (cfa8ea7)-[:SHAREHOLDER_OF]->(e9ff007) (db2f19c)-[:SHAREHOLDER_OF]->(e9ff007) (c59a7b6)-[:SHAREHOLDER_OF]->(e9ff007) (a06968a)-[:SHAREHOLDER_OF]->(e9ff007) (fb1cdaa)-[:SHAREHOLDER_OF]->(e9ff007) (f5d83c5)-[:SHAREHOLDER_OF]->(e9ff007) (c99c3f7)-[:SHAREHOLDER_OF]->(e9ff007)
iid=e['internal_id']
for r in graph.run("MATCH (a:Entity) WHERE a.internal_id ='{}' RETURN a LIMIT 10".format(iid)):
print(r)
('a': (e9ff007:Entity {address:"INFANTE & PEREZ ALMILLANO CL. 50 Y 74 SAN FRANCISCO; PH 909; PISOS 12 Y 14 APARTADO POSTAL 0830-00142; Z; PANAMA",countries:"Panama",country_codes:"PAN",incorporation_date:"09-JAN-2008",internal_id:"6036924",jurisdiction:"BVI",jurisdiction_description:"British Virgin Islands",name:"PORTONES DEL MAR INVESTORS INC.",node_id:"10208791",original_name:"PORTONES DEL MAR INVESTORS INC.",service_provider:"Mossack Fonseca",sourceID:"Panama Papers",status:"Active",valid_until:"The Panama Papers data is current through 2015"}))