In this document we have some LLM experiments using the DISK data.
from dotenv import load_dotenv
import os
import re
from time import sleep
from SPARQLWrapper import SPARQLWrapper, JSON
from langchain_community.graphs import Neo4jGraph
# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
#Load config for Fuseki and Neo4J
load_dotenv('.env', override=True)
FUSEKI_URI = os.getenv('FUSEKI_URI')
FUSEKI_USERNAME = os.getenv('FUSEKI_USERNAME')
FUSEKI_PASSWORD = os.getenv('FUSEKI_PASSWORD')
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Create SPARQL wrapper
sparql = SPARQLWrapper(FUSEKI_URI)
if FUSEKI_USERNAME and FUSEKI_PASSWORD:
sparql.setCredentials(FUSEKI_USERNAME, FUSEKI_PASSWORD)
sparql.setReturnFormat(JSON)
# Create Neo4J wrapper
neo4j = Neo4jGraph(
url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)
# Some constants
PREFIXES = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"""
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <https://raw.githubusercontent.com/KnowledgeCaptureAndDiscovery/QuestionOntology/main/development/1.3.1/EnigmaQuestions.xml> WHERE {
?QuestionId a <https://w3id.org/sqo#Question> ;
rdfs:label ?QuestionName ;
<https://w3id.org/sqo#hasQuestionTemplate> ?questionTemplate ;
}""")
results = sparql.query().convert()
# Creating a dict to store question templates.
questions = {}
for item in results['results']['bindings']:
questions[item['QuestionId']['value']] = {}
questions[item['QuestionId']['value']]['id'] = item['QuestionId']['value']
questions[item['QuestionId']['value']]['name'] = item['QuestionName']['value']
questions[item['QuestionId']['value']]['template'] = item['questionTemplate']['value']
add_query = """
MERGE(question:Question {id: $question.id})
ON CREATE SET
question.name = $question.name,
question.template = $question.template
RETURN question.name"""
for c in questions:
r = neo4j.query(add_query, params={'question':questions[c]})
print(r)
neo4j.query("""
CREATE CONSTRAINT unique_question IF NOT EXISTS
FOR (q:Question) REQUIRE q.id IS UNIQUE
""")
[{'question.name': 'Is the effect size of ?Genotype in ?Brain Imaging Trait associated with ?Demographic Attribute?'}] [{'question.name': 'Is ?Brain Characteristic associated with ?Neurological Disorder in comparison to healthy controls?'}] [{'question.name': 'Is the effect size of ?Genotype on ?Brain Imaging Trait of ?Region associated with ?Demographic Attribute?'}] [{'question.name': 'What is the effect size of ?Genotype on ?Region ?Brain Imaging Trait?'}] [{'question.name': 'Is the effect size of ?Genotype on ?Brain Imaging Trait of ?Region associated with ?Demographic Attribute for cohorts groups filtered by ?Criterion for ?Value?'}] [{'question.name': 'What is the effect size of ?Genotype on ?Region ?Brain Imaging Trait for cohorts groups filtered by ?Criterion for ?Value?'}]
[]
# DELETE:
#neo4j.query("""MATCH (q:Question) DELETE q""")
# Goals
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?GoalId a <http://disk-project.org/ontology/disk#Goal> ;
<http://www.w3.org/2000/01/rdf-schema#label> ?GoalName ;
<http://www.w3.org/2000/01/rdf-schema#comment> ?Description ;
<http://disk-project.org/ontology/disk#dateCreated> ?Created ;
<http://disk-project.org/ontology/disk#hasQuestion> ?QuestionId .
optional { ?GoalId <http://disk-project.org/ontology/disk#dateModified> ?Modified }
}
""")
goal_results = sparql.query().convert()
# Store goals, those are linked to question by question_id
goals = {}
for item in goal_results['results']['bindings']:
goals[item['GoalId']['value']] = {}
goals[item['GoalId']['value']]['id'] = item['GoalId']['value']
goals[item['GoalId']['value']]['name'] = item['GoalName']['value']
goals[item['GoalId']['value']]['description'] = item['Description']['value']
goals[item['GoalId']['value']]['date_created'] = item['Created']['value']
goals[item['GoalId']['value']]['question_id'] = item['QuestionId']['value']
if 'modified' in item:
goals[item['GoalId']['value']]['date_modified'] = item['Modified']['value']
add_query = """
MERGE(goal:Goal {id: $goal.id})
ON CREATE SET
goal.name = $goal.name,
goal.description = $goal.description,
goal.date_created = $goal.date_created,
goal.date_modified = $goal.date_modified
RETURN goal.name"""
merge_query = """
MATCH (q:Question {id: $question.id}), (g:Goal {id: $goal.id})
MERGE (g)-[relationship:hasQuestion]->(q)
RETURN relationship"""
for g in goals:
q = questions[goals[g]['question_id']]
r = neo4j.query(add_query, params={'goal':goals[g]})
print(r)
r = neo4j.query(merge_query, params={'question':q, 'goal':goals[g]})
print(r)
neo4j.query("""CREATE CONSTRAINT unique_goal IF NOT EXISTS FOR (g:Goal) REQUIRE g.id IS UNIQUE""")
[{'goal.name': "Is the effect size of the association between SNP rs1080066 and the Surface Area of the Precentral gyrus associated with a cohort's mean age in cohorts of European Ancestry?"}] [{'relationship': ({}, 'hasQuestion', {})}] [{'goal.name': 'What is the Effect Size of rs1080066 on Precental Cortex Surface Area for cohorts groups of European ancestry'}] [{'relationship': ({}, 'hasQuestion', {})}]
[]
# Load Variable bindings for Goal's question
add_binding_query = """
MERGE(b:Binding {id: $binding.id})
ON CREATE SET
b.variable = $binding.variable,
b.value = $binding.value,
b.type = $binding.type
RETURN b.id"""
merge_goal_binding_query = """
MATCH (goal:Goal {id: $goal.id}), (binding:Binding {id: $binding.id})
MERGE (goal)-[relationship:hasQuestionBinding]->(binding)
RETURN relationship"""
for g in goals:
goal = goals[g]
sparql.setQuery(PREFIXES + """
SELECT DISTINCT ?Binding ?Variable ?Value ?Type FROM <http://localhost:8080/disk-project-server> WHERE {
?Goal <http://disk-project.org/ontology/disk#hasQuestionBindings> ?Binding .
?Binding <http://disk-project.org/ontology/disk#hasBindingVariable> ?Variable ;
<http://disk-project.org/ontology/disk#hasBindingValue> ?Value ;
<http://disk-project.org/ontology/disk#hasType> ?Type .
VALUES ?Goal { <""" + goal['id'] + "> }}")
r = sparql.query().convert()
for item in r['results']['bindings']:
binding = {}
binding['id'] = item['Binding']['value']
binding['variable'] = item['Variable']['value']
binding['value'] = item['Value']['value']
binding['type'] = item['Type']['value']
out = neo4j.query(add_binding_query, params={'binding':binding})
print(out)
out = neo4j.query(merge_goal_binding_query, params={'binding':binding, 'goal':goal})
print(out)
neo4j.query("""CREATE CONSTRAINT unique_binding IF NOT EXISTS FOR (l:Binding) REQUIRE l.id IS UNIQUE""")
[{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/genotype-Ky8zLkhrWpVO'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/brainImagingTrait-SUjM0MeseHxB'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/CriterionValue-Y13ptyhmjJd6'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/demographicAttribute-Y5l3ZuoYITux'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/brainRegion-AUfzTg0TjlL4'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-XBkQmDYmJAn0/bindings/Criterion-1GtBXBML6uw3'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-7l4AS1WcMhyh/bindings/genotype-jqrCiLBJ2zji'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-7l4AS1WcMhyh/bindings/brainRegion-jVSd824Nlovz'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-7l4AS1WcMhyh/bindings/brainImagingTrait-TrONS5Jvmpgv'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-7l4AS1WcMhyh/bindings/Criterion-Qvoq1zG8ZWr3'}] [{'relationship': ({}, 'hasQuestionBinding', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/goals/Goal-7l4AS1WcMhyh/bindings/CriterionValue-qQr8gCuTtMA8'}] [{'relationship': ({}, 'hasQuestionBinding', {})}]
[]
# DELETE:
#neo4j.query("""MATCH (g:Goal) DELETE g""")
#neo4j.query("""MATCH (g:Goal)-[relationship:hasQuestion]->(q:Question) DELETE relationship""")
# Lines of Inquiry
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?LOI a <http://disk-project.org/ontology/disk#LineOfInquiry> ;
<http://www.w3.org/2000/01/rdf-schema#label> ?Name ;
<http://www.w3.org/2000/01/rdf-schema#comment> ?Description ;
<http://disk-project.org/ontology/disk#hasQuestion> ?QuestionId ;
<http://disk-project.org/ontology/disk#dateCreated> ?Created ;
<http://disk-project.org/ontology/disk#hasMetaWorkflowSeed> ?Seed ;
<http://disk-project.org/ontology/disk#hasDataQuery> ?DataQuery .
?DataQuery <http://disk-project.org/ontology/disk#hasQueryTemplate> ?QueryTemplate .
optional { ?LOI <http://disk-project.org/ontology/disk#dateModified> ?Modified }
}""")
loi_results = sparql.query().convert()
# Store lois, those are linked to question by question_id
lois = {}
for item in loi_results['results']['bindings']:
lois[item['LOI']['value']] = {}
lois[item['LOI']['value']]['id'] = item['LOI']['value']
lois[item['LOI']['value']]['name'] = item['Name']['value']
lois[item['LOI']['value']]['description'] = item['Description']['value']
lois[item['LOI']['value']]['date_created'] = item['Created']['value']
lois[item['LOI']['value']]['question_id'] = item['QuestionId']['value']
lois[item['LOI']['value']]['query_template'] = item['QueryTemplate']['value']
lois[item['LOI']['value']]['seed_id'] = item['Seed']['value'] #Not loaded yet
if 'modified' in item:
lois[item['LOI']['value']]['date_modified'] = item['Modified']['value']
add_loi_query = """
MERGE(loi:LineOfInquiry {id: $loi.id})
ON CREATE SET
loi.name = $loi.name,
loi.description = $loi.description,
loi.date_created = $loi.date_created,
loi.date_modified = $loi.date_modified,
loi.query_template = $loi.query_template
RETURN loi.name"""
merge_loi_query = """
MATCH (q:Question {id: $question.id}), (loi:LineOfInquiry {id: $loi.id})
MERGE (loi)-[relationship:hasQuestion]->(q)
RETURN relationship"""
for loi in lois:
q = questions[lois[loi]['question_id']]
r = neo4j.query(add_loi_query, params={'loi':lois[loi]})
print(r)
r = neo4j.query(merge_loi_query, params={'question':q, 'loi':lois[loi]})
print(r)
neo4j.query("""CREATE CONSTRAINT unique_loi IF NOT EXISTS FOR (l:LineOfInquiry) REQUIRE l.id IS UNIQUE""")
[{'loi.name': 'Meta regression with a filter'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'loi.name': 'Meta analysis'}] [{'relationship': ({}, 'hasQuestion', {})}]
[]
# Load Workflow seeds for LOIs
add_seed_query = """
MERGE(wfs:WorkflowSeed {id: $seed.id})
ON CREATE SET
wfs.variable = $seed.variable,
wfs.value = $seed.value,
wfs.type = $seed.type
RETURN wfs.id"""
merge_loi_seed_query = """
MATCH (loi:LineOfInquiry {id: $loi.id}), (seed:WorkflowSeed {id: $seed.id})
MERGE (loi)-[relationship:hasWorkflowSeed]->(seed)
RETURN relationship"""
seeds = {}
for i in lois:
loi = lois[i]
seed_id = loi['seed_id']
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?Seed <http://www.w3.org/2000/01/rdf-schema#label> ?Name ;
<http://www.w3.org/2000/01/rdf-schema#comment> ?Description ;
VALUES ?Seed { <""" + seed_id + "> }}")
r = sparql.query().convert()
if len(r['results']['bindings']) > 0:
item = r['results']['bindings'][0]
seed = {}
seed['id'] = seed_id
seed['name'] = item['Name']['value']
seed['description'] = item['Description']['value']
seeds[seed_id] = seed
out = neo4j.query(add_seed_query, params={'seed':seed})
print(out)
out = neo4j.query(merge_loi_seed_query, params={'seed':seed, 'loi':loi})
print(out)
neo4j.query("""CREATE CONSTRAINT unique_seed IF NOT EXISTS FOR (l:WorkflowSeed) REQUIRE l.id IS UNIQUE""")
[{'wfs.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f'}] [{'relationship': ({}, 'hasWorkflowSeed', {})}] [{'wfs.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM'}] [{'relationship': ({}, 'hasWorkflowSeed', {})}]
[]
# Load Bindings for wf seeds
merge_seed_parameter_query = """
MATCH (seed:WorkflowSeed {id: $seed.id}), (binding:Binding {id: $binding.id})
MERGE (seed)-[relationship:hasParameter]->(binding)
RETURN relationship"""
merge_seed_input_query = """
MATCH (seed:WorkflowSeed {id: $seed.id}), (binding:Binding {id: $binding.id})
MERGE (seed)-[relationship:hasInput]->(binding)
RETURN relationship"""
for i in seeds:
seed = seeds[i]
seed_id = seed['id']
for t in ['hasParameter', 'hasInput']:
sparql_query = PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?Seed <http://disk-project.org/ontology/disk#""" + t + """> ?Binding .
?Binding <http://disk-project.org/ontology/disk#hasBindingVariable> ?Variable ;
<http://disk-project.org/ontology/disk#hasBindingValue> ?Value ;
<http://disk-project.org/ontology/disk#hasType> ?Type .
VALUES ?Seed { <""" + seed_id + "> }}"
sparql.setQuery(sparql_query)
r = sparql.query().convert()
for item in r['results']['bindings']:
binding = {}
binding['id'] = item['Binding']['value']
binding['variable'] = item['Variable']['value']
binding['value'] = item['Value']['value']
binding['type'] = item['Type']['value']
out = neo4j.query(add_binding_query, params={'binding':binding})
print(out)
if t == 'hasParameter':
out = neo4j.query(merge_seed_parameter_query, params={'binding':binding, 'seed':seed})
else:
out = neo4j.query(merge_seed_input_query, params={'binding':binding, 'seed':seed})
print(out)
[{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/area-S2u4Gd3XADjg'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/demographic-XfwdBufvw0y2'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/demographic_value-HncGxEoq2MIs'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/snp-YCVVj9h2OlNr'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/trait-myGIDu70W593'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-Q7zw0HsrUwwD/seeds/-wbAoiLMRuJ3f/bindings/cohortData-uaGNGjoUbclR'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM/bindings/area-O8o2iLLy8xAa'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM/bindings/snp-sOLb1IxtU9CP'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM/bindings/trait-EoNSTe2xROPj'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM/bindings/demographic_value-E7sbHmUfAS54'}] [{'relationship': ({}, 'hasParameter', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/lois/LOI-pEJwIhcWNTDS/seeds/-cTAZIDs8XfzM/bindings/cohortData-yBhLXX1ggMJW'}] [{'relationship': ({}, 'hasInput', {})}]
# Lines of Inquiry
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?TLOI a <http://disk-project.org/ontology/disk#TriggeredLineOfInquiry> ;
<http://www.w3.org/2000/01/rdf-schema#label> ?Name ;
<http://www.w3.org/2000/01/rdf-schema#comment> ?Description ;
<http://disk-project.org/ontology/disk#hasQuestion> ?QuestionId ;
<http://disk-project.org/ontology/disk#hasLineOfInquiry> ?LoiId ;
<http://disk-project.org/ontology/disk#hasGoal> ?GoalId ;
<http://disk-project.org/ontology/disk#dateCreated> ?Created ;
<http://disk-project.org/ontology/disk#hasMetaWorkflowInstantiation> ?Inst ;
<http://disk-project.org/ontology/disk#hasStatus> ?Status;
<http://disk-project.org/ontology/disk#hasQueryResults> ?QueryResults .
?QueryResults <http://disk-project.org/ontology/disk#hasQuery> ?Query ;
<http://disk-project.org/ontology/disk#hasQueryTemplate> ?QueryTemplate ;
<http://disk-project.org/ontology/disk#hasResult> ?Result
optional { ?TLOI <http://disk-project.org/ontology/disk#dateModified> ?Modified }
}""")
results = sparql.query().convert()
# Store tlois, those are linked to goals and lois.
tlois = {}
for item in results['results']['bindings']:
tlois[item['TLOI']['value']] = {}
tlois[item['TLOI']['value']]['id'] = item['TLOI']['value']
tlois[item['TLOI']['value']]['name'] = item['Name']['value']
tlois[item['TLOI']['value']]['description'] = item['Description']['value']
tlois[item['TLOI']['value']]['date_created'] = item['Created']['value']
tlois[item['TLOI']['value']]['question_id'] = item['QuestionId']['value']
tlois[item['TLOI']['value']]['loi_id'] = item['LoiId']['value']
tlois[item['TLOI']['value']]['goal_id'] = item['GoalId']['value']
tlois[item['TLOI']['value']]['status'] = item['Status']['value']
tlois[item['TLOI']['value']]['inst'] = item['Inst']['value']
tlois[item['TLOI']['value']]['query_template'] = item['QueryTemplate']['value']
tlois[item['TLOI']['value']]['query'] = item['Query']['value']
tlois[item['TLOI']['value']]['query_response'] = item['Result']['value']
if 'modified' in item:
tlois[item['TLOI']['value']]['date_modified'] = item['Modified']['value']
add_tloi_query = """
MERGE(tloi:TriggeredLineOfInquiry {id: $tloi.id})
ON CREATE SET
tloi.name = $tloi.name,
tloi.description = $tloi.description,
tloi.source = $tloi.id,
tloi.status = $tloi.status,
tloi.date_created = $tloi.date_created,
tloi.date_modified = $tloi.date_modified,
tloi.query = $tloi.query,
tloi.query_template = $tloi.query_template,
tloi.query_response = $tloi.query_response
RETURN tloi.name"""
merge_tloi_question_query = """
MATCH (q:Question {id: $question.id}), (tloi:TriggeredLineOfInquiry {id: $tloi.id})
MERGE (tloi)-[relationship:hasQuestion]->(q)
RETURN relationship"""
merge_tloi_loi_query = """
MATCH (loi:LineOfInquiry {id: $loi.id}), (tloi:TriggeredLineOfInquiry {id: $tloi.id})
MERGE (tloi)-[relationship:hasLineOfInquiry]->(loi)
RETURN relationship"""
merge_tloi_goal_query = """
MATCH (g:Goal {id: $goal.id}), (tloi:TriggeredLineOfInquiry {id: $tloi.id})
MERGE (tloi)-[relationship:hasGoal]->(g)
RETURN relationship"""
for tloi in tlois:
q = questions[tlois[tloi]['question_id']]
loi = lois[tlois[tloi]['loi_id']]
goal = goals[tlois[tloi]['goal_id']]
r = neo4j.query(add_tloi_query, params={'tloi':tlois[tloi]})
print(r)
r = neo4j.query(merge_tloi_question_query, params={'question':q, 'tloi':tlois[tloi]})
print(r)
r = neo4j.query(merge_tloi_loi_query, params={'loi':loi, 'tloi':tlois[tloi]})
print(r)
r = neo4j.query(merge_tloi_goal_query, params={'goal':goal, 'tloi':tlois[tloi]})
print(r)
neo4j.query("""CREATE CONSTRAINT unique_tloi IF NOT EXISTS FOR (l:TriggeredLineOfInquiry) REQUIRE l.id IS UNIQUE""")
[{'tloi.name': 'Triggered: Meta regression with ancestry'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with ancestry'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with ancestry'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with a filter'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with a filter'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with a filter'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}] [{'tloi.name': 'Triggered: Meta regression with a filter'}] [{'relationship': ({}, 'hasQuestion', {})}] [{'relationship': ({}, 'hasLineOfInquiry', {})}] [{'relationship': ({}, 'hasGoal', {})}]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[22], line 33 31 q = questions[tlois[tloi]['question_id']] 32 loi = lois[tlois[tloi]['loi_id']] ---> 33 goal = goals[tlois[tloi]['goal_id']] 34 r = neo4j.query(add_tloi_query, params={'tloi':tlois[tloi]}) 35 print(r) KeyError: 'http://localhost:8080/disk-project-server/goals/Goal-DVcUWW5xZFXX'
add_inst_query = """
MERGE(inst:WorkflowInstantiation {id: $inst.id})
ON CREATE SET
inst.name = $inst.name,
inst.description = $inst.description,
inst.status = $inst.status,
inst.workflow_link = $inst.workflow_link
RETURN inst.name"""
merge_tloi_inst_query = """
MATCH (wfc:WorkflowInstantiation {id: $inst.id}), (tloi:TriggeredLineOfInquiry {id: $tloi.id})
MERGE (tloi)-[relationship:hasWorkflowInstantiation]->(wfc)
RETURN relationship"""
# Load workflow values for loaded tlois:
instantiations = {}
for tid in tlois:
tloi = tlois[tid]
inst_id = tloi['inst']
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?I <http://www.w3.org/2000/01/rdf-schema#label> ?Name ;
<http://www.w3.org/2000/01/rdf-schema#comment> ?Description ;
<http://disk-project.org/ontology/disk#hasWorkflow> ?WorkflowLink;
<http://disk-project.org/ontology/disk#hasExecution> ?Execution .
OPTIONAL {?I <http://disk-project.org/ontology/disk#hasStatus> ?Status}
VALUES ?I {<""" + inst_id + ">}}")
r = sparql.query().convert()
if len(r['results']['bindings']) > 0:
item = r['results']['bindings'][0]
instantiation = {}
instantiation['id'] = inst_id
instantiation['name'] = item['Name']['value']
instantiation['description'] = item['Description']['value']
instantiation['workflow_link'] = item['WorkflowLink']['value']
instantiation['execution'] = item['Execution']['value']
if 'status' in instantiation:
instantiation['status'] = item['Status']['value']
instantiations[inst_id] = instantiation
out = neo4j.query(add_inst_query, params={'inst':instantiation})
print(out)
out = neo4j.query(merge_tloi_inst_query, params={'inst':instantiation, 'tloi':tloi})
print(out)
else:
print("Workflow instantiation not found for " + inst_id)
neo4j.query("""CREATE CONSTRAINT unique_inst IF NOT EXISTS FOR (l:WorkflowInstantiation) REQUIRE l.id IS UNIQUE""")
[{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] [{'relationship': ({}, 'hasWorkflowInstantiation', {})}] [{'inst.name': 'Meta-Regression'}] []
[]
# Load Bindings for wf seeds
#merge_inst_parameter_query = """
#MATCH (inst:WorkflowInstantiation {id: $inst.id}), (binding:Binding {id: $binding.id})
#MERGE (inst)-[relationship:hasParameter]->(binding)
#RETURN relationship"""
#merge_inst_input_query = """
#MATCH (inst:WorkflowInstantiation {id: $inst.id}), (binding:Binding {id: $binding.id})
#MERGE (inst)-[relationship:hasInput]->(binding)
#RETURN relationship"""
#merge_inst_output_query = """
#MATCH (inst:WorkflowInstantiation {id: $inst.id}), (binding:Binding {id: $binding.id})
#MERGE (inst)-[relationship:hasOutput]->(binding)
#RETURN relationship"""
#
#for inst_id in instantiations:
# inst = instantiations[inst_id]
# for t in ['hasParameter', 'hasInput', 'hasOutput']:
# sparql_query = PREFIXES + """
#SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
# ?Inst <http://disk-project.org/ontology/disk#""" + t + """> ?Binding .
# ?Binding <http://disk-project.org/ontology/disk#hasBindingVariable> ?Variable ;
# <http://disk-project.org/ontology/disk#hasBindingValue> ?Value ;
# <http://disk-project.org/ontology/disk#hasType> ?Type .
# VALUES ?Inst { <""" + inst_id + "> }}"
# sparql.setQuery(sparql_query)
# r = sparql.query().convert()
# for item in r['results']['bindings']:
# binding = {}
# binding['id'] = item['Binding']['value']
# binding['variable'] = item['Variable']['value']
# binding['value'] = item['Value']['value']
# binding['type'] = item['Type']['value']
# out = neo4j.query(add_binding_query, params={'binding':binding})
# print(out)
# if t == 'hasParameter':
# out = neo4j.query(merge_inst_parameter_query, params={'binding':binding, 'inst':inst})
# elif t == 'hasInput':
# out = neo4j.query(merge_inst_input_query, params={'binding':binding, 'inst':inst})
# else:
# out = neo4j.query(merge_inst_output_query, params={'binding':binding, 'inst':inst})
# print(out)
# Executions
add_exec_query = """
MERGE(exec:Execution {id: $exec.id})
ON CREATE SET
exec.confidence_type = $exec.confidence_type,
exec.confidence_value = toFloat($exec.confidence_value),
exec.date_start = $exec.start_date,
exec.date_end = $exec.end_date
RETURN exec.confidence_value"""
merge_exec_inst_query = """
MATCH (wfc:WorkflowInstantiation {id: $inst.id}), (exec:Execution {id: $exec.id})
MERGE (wfc)-[relationship:hasExecution]->(exec)
RETURN relationship"""
executions = {}
for i in instantiations:
inst = instantiations[i];
exec_id = inst['execution']
sparql.setQuery(PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?Execution <http://disk-project.org/ontology/disk#hasRunStartDate> ?StartDate ;
<http://disk-project.org/ontology/disk#hasRunEndDate> ?EndDate ;
<http://disk-project.org/ontology/disk#hasResult> ?Result .
?Result <http://disk-project.org/ontology/disk#hasConfidenceType> ?ConfidenceType .
?Result <http://disk-project.org/ontology/disk#hasConfidenceValue> ?ConfidenceValue
VALUES ?Execution {<""" + exec_id + ">}}")
r = sparql.query().convert()
if len(r['results']['bindings']) > 0:
item = r['results']['bindings'][0]
execution = {}
execution['id'] = exec_id
execution['start_date'] = item['StartDate']['value']
execution['end_date'] = item['EndDate']['value']
execution['confidence_type'] = item['ConfidenceType']['value']
execution['confidence_value'] = item['ConfidenceValue']['value']
executions[exec_id] = execution
out = neo4j.query(add_exec_query, params={'exec':execution})
print(out)
out = neo4j.query(merge_exec_inst_query, params={'inst':inst, 'exec':execution})
print(out)
neo4j.query("""CREATE CONSTRAINT unique_exec IF NOT EXISTS FOR (l:Execution) REQUIRE l.id IS UNIQUE""")
[{'exec.confidence_value': '0.263176970715283e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.044376539244712e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.37155457478486e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.500060579258888e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.020618776795934e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.0828042022392925e0'}] [{'relationship': ({}, 'hasExecution', {})}] [{'exec.confidence_value': '0.018198423339433e0'}] [{'relationship': ({}, 'hasExecution', {})}]
[]
# Load Inputs and outputs for executions
merge_exec_input_query = """
MATCH (exec:Execution {id: $exec.id}), (binding:Binding {id: $binding.id})
MERGE (exec)-[relationship:hasInput]->(binding)
RETURN relationship"""
merge_exec_output_query = """
MATCH (exec:Execution {id: $exec.id}), (binding:Binding {id: $binding.id})
MERGE (exec)-[relationship:hasOutput]->(binding)
RETURN relationship"""
for exec_id in executions:
ex = executions[exec_id]
for t in ['hasInputFile', 'hasOutputFile']:
sparql_query = PREFIXES + """
SELECT DISTINCT * FROM <http://localhost:8080/disk-project-server> WHERE {
?Exec <http://disk-project.org/ontology/disk#""" + t + """> ?Binding .
?Binding <http://disk-project.org/ontology/disk#hasBindingVariable> ?Variable ;
<http://disk-project.org/ontology/disk#hasBindingValue> ?Value ;
<http://disk-project.org/ontology/disk#hasType> ?Type .
VALUES ?Exec { <""" + exec_id + "> }}"
sparql.setQuery(sparql_query)
r = sparql.query().convert()
for item in r['results']['bindings']:
binding = {}
binding['id'] = item['Binding']['value']
binding['variable'] = item['Variable']['value']
binding['value'] = item['Value']['value']
binding['type'] = item['Type']['value']
out = neo4j.query(add_binding_query, params={'binding':binding})
print(out)
if t == 'hasInputFile':
out = neo4j.query(merge_exec_input_query, params={'binding':binding, 'exec':ex})
else:
out = neo4j.query(merge_exec_output_query, params={'binding':binding, 'exec':ex})
print(out)
[{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/snp-9KsH6OCKGJ2A'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/demographic_min-6JIJEK1lGoDh'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/trait-feW04T9gR5jg'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/demographic_value-hF9GLnTTKMZM'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/demographic-xDaSSRJlqbS1'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/area-6T51xYmHFFxR'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/demographic_max-iT9J5wYjRn3o'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/cohortData-YQ8q3TKzaI7I'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/log-s94xrVsoOigf'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/p_value-wZu4AXbGKEYB'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/brain_visualization-zOghWK6hUkKJ'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ/instantiations/-LbRnRP91gcXu/bindings/scatter-MDjiGB8P4yjF'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/demographic_min-oxOStdKAt1dE'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/demographic_value-LdhTYw8xDNBY'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/trait-M7E9bN3vYEFv'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/demographic_max-s3L4rbFeez0i'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/snp-80tnHu1faEGt'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/demographic-gRDSowEiqgPe'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/area-4XH0gGPY5CvH'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/cohortData-Z3R0oBSm16S1'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/p_value-pD7IWFEuqSTe'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/brain_visualization-3VCoEDF4zFUw'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/log-vj0VdhZzRVOu'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K/instantiations/-wnFyzaHf3k0J/bindings/scatter-JEUNrxvpmfYP'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/snp-kMVnCHANGvE5'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/demographic_max-d4kucGEBkjsr'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/demographic-BStAnJDxMQbf'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/trait-i0uvie4Gtuwq'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/demographic_value-mqx4OKfPBgZl'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/area-zwtmMApbBJhV'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/demographic_min-AbyjZ9Qu2tk0'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/cohortData-UtzgYethtUEU'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/scatter-giJqZho6kb5c'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/brain_visualization-X8CJAGKSurYQ'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/log-S5bfgKUHywEi'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3/instantiations/-p4T22wUh71sC/bindings/p_value-SzC3WykNYeLH'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/demographic-g21Y3aVY3cdO'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/demographic_max-G3AjR1eeYJm0'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/area-oxmoaknioW25'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/snp-SXb5dajUhbiJ'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/demographic_min-qJF5YPuXPte7'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/trait-lfXqxGC6soHU'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/demographic_value-fJSQqBneGwNo'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/cohortData-HpJZKtSftdz8'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/brain_visualization-gGDKpHWrU7JW'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/log-qwW4Ya7olyNi'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/scatter-mQTfSnrytq9J'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM/instantiations/-DTq9uoBEMkgm/bindings/p_value-O0gmn75rRATo'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/demographic_value-0bPim8oy48hf'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/snp-uTvuspMuRvjn'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/demographic_min-K2n1FIlEGMBO'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/demographic_max-hAAcU1285MnY'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/demographic-ZRYbxoMbYuAF'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/trait-XQsTSz6jTYld'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/area-2YS9I22BSOkJ'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/cohortData-YwSHneWNLLhV'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/log-JdOulww16v2T'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/brain_visualization-fGKJgbXWSp72'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/p_value-C2QQwRiykfGk'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61/instantiations/-6pdbgyBxBb9w/bindings/scatter-WRjNugK2Elm8'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/snp-twyafQVQOisd'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/trait-8fTONXjWs816'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/demographic-PxoiGF4m1BYo'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/demographic_min-EEskA1oaG2FT'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/area-MXF48etJAwrF'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/demographic_value-8MQ2G1piVSqI'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/demographic_max-LD5P9UdfgzIC'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/cohortData-dyhj26G7LGcN'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/log-A5pG7zRUGypq'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/scatter-v6VSuxkq7e2E'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/brain_visualization-Wd0GZmyyN64v'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez/instantiations/-Y9pKl4PTjzlu/bindings/p_value-hQjTZmpKxm5h'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/demographic-jcEs0eT7hHi8'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/demographic_min-x2x5Qi2lBLHT'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/demographic_max-vrxOL5LYR53V'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/trait-aGXpFyovrwCy'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/demographic_value-RHIelBfw0XWk'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/area-v5vecFDgrvz0'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/snp-uvmZzoIsJAOf'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/cohortData-4MHCbnj5G1Ji'}] [{'relationship': ({}, 'hasInput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/scatter-jWqHdG8hgfq1'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/log-tsEeO9iqTqm2'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/p_value-oSOCMfJRtaij'}] [{'relationship': ({}, 'hasOutput', {})}] [{'b.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC/instantiations/-iw3phUA6ox6U/bindings/brain_visualization-K4qh4r4ROTdr'}] [{'relationship': ({}, 'hasOutput', {})}]
neo4j.refresh_schema()
print(neo4j.schema)
Node properties: Question {id: STRING, name: STRING, template: STRING} Goal {date_created: STRING, description: STRING, id: STRING, name: STRING} LineOfInquiry {name: STRING, date_created: STRING, description: STRING, id: STRING, query_template: STRING} TriggeredLineOfInquiry {fullText: STRING, textEmbedding: LIST, source: STRING, status: STRING, query_template: STRING, query: STRING, date_created: STRING, query_response: STRING, description: STRING, id: STRING, name: STRING} Execution {date_end: STRING, date_start: STRING, confidence_value: STRING, id: STRING, confidence_type: STRING} Binding {type: STRING, id: STRING, variable: STRING, value: STRING} WorkflowSeed {id: STRING} WorkflowInstantiation {name: STRING, description: STRING, id: STRING, workflow_link: STRING} Relationship properties: The relationships: (:Goal)-[:hasQuestion]->(:Question) (:Goal)-[:hasQuestionBinding]->(:Binding) (:LineOfInquiry)-[:hasQuestion]->(:Question) (:LineOfInquiry)-[:hasWorkflowSeed]->(:WorkflowSeed) (:TriggeredLineOfInquiry)-[:hasWorkflowInstantiation]->(:WorkflowInstantiation) (:TriggeredLineOfInquiry)-[:hasQuestion]->(:Question) (:TriggeredLineOfInquiry)-[:hasLineOfInquiry]->(:LineOfInquiry) (:TriggeredLineOfInquiry)-[:hasGoal]->(:Goal) (:Execution)-[:hasOutput]->(:Binding) (:Execution)-[:hasInput]->(:Binding) (:WorkflowSeed)-[:hasInput]->(:Binding) (:WorkflowSeed)-[:hasParameter]->(:Binding) (:WorkflowInstantiation)-[:hasExecution]->(:Execution)
TEXT_CONTEXT = """
[GENERAL CONTEXT]
A Question Template is a text representation of possible questions the DISK system is able to test.
Question Templates contains one or more Question Variables that are denoted by the prefix “?” (e.g ?Genotype is the Question Variable "Genotype").
Question Variables provide multiple options retrieved from the data source. Users can select option values to customize the Question Template.
A Goal is what a DISK user wants to test. Goals are identified by an ID and have Name and Description.
Goals follow a Question Template and provide values for all of its Question Variables.
A Line of Inquiry is how DISK will test a Question Template. Lines of inquiry are identified by ID and have the follorwing properties: Name, Description, Data Query Template and Workflow Seed.
Lines of Inquiry follow a Question Template and use Question Variable values to customize its Data Query Template and Workflow Seed.
When the DISK system finds a Goal and a Line of Inquiry that follows the same Question template, a Triggered Line of Inquiry is created.
A Triggered Line of Inquiry is identified by an ID, Data Query and Workflow Instantiation.
The Triggered Line of Inquiry Data Query is created by using the Goal Question Variable Values to customize the Line of Inquiry Data Query Template.
This data query is used to retrieve inputs and parameters to use on the Workflow Seed. When all parameters and inputs are set, a new Execution is send.
This data query is executed periodically and when new data is found a new Triggered Line of Inquiry is created.
An Execution is a workflow run. Uses the data gathered by the Triggered Line of Inquiry to customize the run of an experiment.
This experiment can return a confidence value and one or several output files.
"""
GOAL_TEMPLATE = """
[GOAL]
ID: {}
Name: {}
Description: {}
Question Template: {}
Question Variable Values: {}
"""
LOI_TEMPLATE = """
[Line of Inquiry]
ID: {}
Name: {}
Description: {}
Question Template: {}
Data Query Template: {}
"""
TLOI_TEMPLATE = """
[Triggered Line of Inquiry]
ID: {}
Goal ID: {}
Line of Inquiry ID: {}
Question Template: {}
Data Query: {}
Workflow Name: {}
Workflow Description: {}
Execution Date: {}
Execution confidence value: {} ({})
Execution Inputs: {}
Execution Outputs: {}
"""
# fulltext for Goals
goal_data = """
MATCH (g:Goal) -[hasQuestion]-> (q:Question)
WITH COLLECT {MATCH (g:Goal) -[hasQuestionTemplate]-> (b:Binding)
RETURN apoc.text.replace(b.variable, ".*?/" , "") + " = " + b.value } as bindings, g, q
RETURN g.id, g.name, g.description, q.template, bindings
"""
goal_results = neo4j.query(goal_data)
goal_text = {};
for i in goal_results:
goal_text[i["g.id"]] = "[GOAL]\nID: {}\nName: {}\nDescription: {}\nQuestion Template: {}\nQuestion Variable Values: {}".format(*i.values())
#print(goal_text[i["g.id"]].replace('http://localhost:8080/disk-project-server/goals/', ''))
loi_data = """
MATCH (loi:LineOfInquiry) -[hasQuestion]-> (q:Question)
RETURN loi.id, loi.name, loi.description, q.template, loi.query_template
"""
loi_results = neo4j.query(loi_data)
loi_text = {}
for i in loi_results:
loi_text[i["loi.id"]] = "[Line of Inquiry]\nID: {}\nName: {}\nDescription: {}\nQuestion Template: {}\nData Query Template: {}".format(*i.values())
#print(loi_text[i["loi.id"]])
#We create embedings for executions, for enigma all TLOIs only run one workflow and have one execution, this is a simplification of the structure of DISK, were multiple workflows/executions are possible.
tloi_data = """
MATCH (tloi:TriggeredLineOfInquiry) -[:hasGoal]-> (g:Goal),
(tloi:TriggeredLineOfInquiry) -[:hasLineOfInquiry]-> (loi:LineOfInquiry),
(loi:LineOfInquiry) -[:hasQuestion]-> (q:Question),
(tloi:TriggeredLineOfInquiry) -[:hasWorkflowInstantiation]-> (inst:WorkflowInstantiation),
(inst:WorkflowInstantiation) -[:hasExecution]-> (exec:Execution)
WITH COLLECT {MATCH (exec:Execution) -[:hasInput]-> (ba:Binding)
RETURN apoc.text.replace(ba.variable, ".*?/" , "") + " = " + ba.value } as inputs,
COLLECT {MATCH (exec:Execution) -[:hasOutput]-> (bb:Binding)
RETURN apoc.text.replace(bb.variable, ".*?/" , "") + " = " + bb.value } as outputs,
tloi, g, loi, inst, exec, q
RETURN tloi.id, g.id, loi.id, q.template,tloi.query, inst.name, inst.description, exec.date_start, exec.confidence_value, exec.confidence_type, apoc.text.join(inputs, "\n - "), apoc.text.join(outputs, "\n - ")
"""
tloi_results = neo4j.query(tloi_data)
tloi_text = {}
for i in tloi_results:
tloi_text[i["tloi.id"]] = "[Triggered Line of Inquiry]\nID: {}\nGoal ID: {}\nLine of Inquiry ID: {}\nQuestion Template: {}\nData Query: {}\nWorkflow Name: {}\nWorkflow Description: {}\nExecution Date: {}\nExecution confidence value: {} ({})\nExecution Inputs: \n - {}\nExecution Outputs: \n - {}".format(*i.values())
#print(tloi_text[i["tloi.id"]].replace('http://localhost:8080/disk-project-server/tlois/','').replace('http://localhost:8080/disk-project-server/goals/','').replace('http://localhost:8080/disk-project-server/lois/','').replace('http://localhost:8080/wings-portal/export/users/admin/Enigma/data/library.owl#',''))
#Write text to neo4j
add_text_tloi = """
MATCH (tloi:TriggeredLineOfInquiry {id: $id})
SET tloi.fullText = $text
RETURN tloi.fullText"""
add_text_loi = """
MATCH (loi:LineOfInquiry {id: $id})
SET loi.fullText = $text
RETURN loi.fullText"""
add_text_goal = """
MATCH (goal:Goal {id: $id})
SET goal.fullText = $text
RETURN goal.fullText"""
#for ID in goal_text.keys():
# print(neo4j.query(add_text_goal, params={'id':ID, 'text': goal_text[ID]}))
#for ID in loi_text.keys():
# print(neo4j.query(add_text_loi, params={'id':ID, 'text': loi_text[ID]}))
#for ID in tloi_text.keys():
# print(neo4j.query(add_text_tloi, params={'id':ID, 'text': tloi_text[ID]}))
# Delete all
#neo4j.query("""MATCH (n) OPTIONAL MATCH (n)-[r]-() WITH n,r LIMIT 50000 DELETE n,r RETURN count(n) as deletedNodesCount""")
neo4j.query("""
CREATE VECTOR INDEX `tloi_text` IF NOT EXISTS
FOR (t:TriggeredLineOfInquiry) ON (t.textEmbedding)
OPTIONS { indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
""")
neo4j.query("""
CREATE VECTOR INDEX `loi_text` IF NOT EXISTS
FOR (t:LineOfInquiry) ON (t.textEmbedding)
OPTIONS { indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
""")
neo4j.query("""
CREATE VECTOR INDEX `goal_text` IF NOT EXISTS
FOR (t:Goal) ON (t.textEmbedding)
OPTIONS { indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
""")
[]
neo4j.query("SHOW INDEXES")
[{'id': 21, 'name': 'goal_text', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['Goal'], 'properties': ['textEmbedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': None, 'readCount': None}, {'id': 1, 'name': 'index_343aff4e', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'NODE', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 9, 25, 7, 11, 19, 167000000, tzinfo=<UTC>), 'readCount': 2300}, {'id': 2, 'name': 'index_f7700477', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'RELATIONSHIP', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 7, 11, 5, 15, 54, 824000000, tzinfo=<UTC>), 'readCount': 1}, {'id': 20, 'name': 'loi_text', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['LineOfInquiry'], 'properties': ['textEmbedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': None, 'readCount': None}, {'id': 19, 'name': 'tloi_text', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['TriggeredLineOfInquiry'], 'properties': ['textEmbedding'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 8, 28, 4, 10, 41, 996000000, tzinfo=<UTC>), 'readCount': 8}, {'id': 15, 'name': 'unique_binding', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['Binding'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_binding', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 4, 46, 57, 268000000, tzinfo=<UTC>), 'readCount': 182321}, {'id': 11, 'name': 'unique_exec', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['Execution'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_exec', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 4, 46, 57, 268000000, tzinfo=<UTC>), 'readCount': 686}, {'id': 5, 'name': 'unique_goal', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['Goal'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_goal', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 7, 24, 29, 931000000, tzinfo=<UTC>), 'readCount': 358}, {'id': 13, 'name': 'unique_inst', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['WorkflowConfiguration'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_inst', 'lastRead': neo4j.time.DateTime(2024, 7, 12, 6, 11, 17, 400000000, tzinfo=<UTC>), 'readCount': 183}, {'id': 7, 'name': 'unique_loi', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['LineOfInquiry'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_loi', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 7, 25, 29, 26000000, tzinfo=<UTC>), 'readCount': 150}, {'id': 3, 'name': 'unique_question', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['Question'], 'properties': ['questionId'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_question', 'lastRead': neo4j.time.DateTime(2024, 7, 11, 3, 37, 5, 228000000, tzinfo=<UTC>), 'readCount': 17}, {'id': 17, 'name': 'unique_seed', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['WorkflowSeed'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_seed', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 4, 46, 39, 666000000, tzinfo=<UTC>), 'readCount': 87469}, {'id': 9, 'name': 'unique_tloi', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'RANGE', 'entityType': 'NODE', 'labelsOrTypes': ['TriggeredLineOfInquiry'], 'properties': ['id'], 'indexProvider': 'range-1.0', 'owningConstraint': 'unique_tloi', 'lastRead': neo4j.time.DateTime(2024, 9, 25, 7, 25, 50, 664000000, tzinfo=<UTC>), 'readCount': 500}]
neo4j.query("""
MATCH (tloi:TriggeredLineOfInquiry) WHERE tloi.textEmbedding IS NULL
return tloi.id
""")
[{'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ'}, {'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K'}, {'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3'}, {'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM'}, {'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61'}, {'tloi.id': 'http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez'}]
for ID in tloi_text.keys():
print(ID)
http://localhost:8080/disk-project-server/tlois/TriggeredLOI-fEdISYTbY6OC http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez http://localhost:8080/disk-project-server/tlois/TriggeredLOI-j5QRPbmS5u61 http://localhost:8080/disk-project-server/tlois/TriggeredLOI-SP3oHYmxkUrM http://localhost:8080/disk-project-server/tlois/TriggeredLOI-MA2p3owIlWh3 http://localhost:8080/disk-project-server/tlois/TriggeredLOI-E8PbUbCdZB4K http://localhost:8080/disk-project-server/tlois/TriggeredLOI-bfjxauj6EcYQ
#This deletes the embedings
#neo4j.query("""
# MATCH (tloi:TriggeredLineOfInquiry) WHERE tloi.textEmbedding IS NOT NULL
# remove tloi.textEmbedding
# return tloi.id
# """)
add_embedings_tloi = """
MATCH (tloi:TriggeredLineOfInquiry {id:$id}) WHERE tloi.textEmbedding IS NULL
CALL apoc.ml.openai.embedding([tloi.fullText], $apiKey) yield embedding
CALL db.create.setNodeVectorProperty(tloi, "textEmbedding", embedding)
"""
for ID in tloi_text.keys():
print(neo4j.query(add_embedings_tloi, params={"apiKey":OPENAI_API_KEY, 'id':ID}))
sleep(2)
[] [] [] [] [] [] []
add_embedings_loi = """
MATCH (loi:LineOfInquiry {id:$id}) WHERE loi.textEmbedding IS NULL
CALL apoc.ml.openai.embedding([loi.fullText], $apiKey) yield embedding
CALL db.create.setNodeVectorProperty(loi, "textEmbedding", embedding)
"""
for ID in loi_text.keys():
print(neo4j.query(add_embedings_loi, params={"apiKey":OPENAI_API_KEY, 'id':ID}))
sleep(2)
[] []
add_embedings_goal = """
MATCH (g:Goal {id:$id}) WHERE g.textEmbedding IS NULL
CALL apoc.ml.openai.embedding([g.fullText], $apiKey) yield embedding
CALL db.create.setNodeVectorProperty(g, "textEmbedding", embedding)
"""
for ID in goal_text.keys():
print(neo4j.query(add_embedings_goal, params={"apiKey":OPENAI_API_KEY, 'id':ID}))
sleep(2)
[] []
eg = neo4j.query("""
MATCH (tloi:TriggeredLineOfInquiry)
WHERE tloi.textEmbedding IS NOT NULL
RETURN tloi.textEmbedding
LIMIT 1
"""
)
eg[0]['tloi.textEmbedding'][:10]
[-0.01246707420796156, 0.01806901954114437, 0.023506201803684235, -0.022146906703710556, -0.03287023678421974, 0.03575359284877777, -0.005955499596893787, 0.005241525825113058, -0.014540343545377254, -0.05266926810145378]
#More data for tlois:
#This query returns too much data, it exceds the max tokens
#retrieval_query = """
# OPTIONAL MATCH (tloi:TriggeredLineOfInquiry) -[hasGoal]-> (g:Goal)
# OPTIONAL MATCH (tloi:TriggeredLineOfInquiry) -[hasLineOfInquiry]-> (l:LineOfInquiry)
# RETURN g.fullText + l.fullText as text, score, {} AS metadata
#"""
#More data for tlois:
retrieval_query = """
OPTIONAL MATCH (tloi:TriggeredLineOfInquiry) -[hasGoal]-> (g:Goal)
RETURN g.fullText as text, score, {} AS metadata
"""
question = "What is the Triggered Line of Inquiry with the lower p-value?"
#Main retriever
neo4j_vector_store_tloi = Neo4jVector.from_existing_graph(
embedding=OpenAIEmbeddings(),
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
index_name='tloi_text',
node_label="TriggeredLineOfInquiry",
text_node_properties=["fullText"],
embedding_node_property="textEmbedding",
#retrieval_query=retrieval_query,
)
tloi_retriever = neo4j_vector_store_tloi.as_retriever()
Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('db.create.setVectorProperty' has been replaced by 'db.create.setNodeVectorProperty')} {position: line: 1, column: 84, offset: 83} for query: "UNWIND $data AS row MATCH (n:`TriggeredLineOfInquiry`) WHERE elementId(n) = row.id CALL db.create.setVectorProperty(n, 'textEmbedding', row.embedding) YIELD node RETURN count(*)"
#Other retrievers
neo4j_vector_store_loi = Neo4jVector.from_existing_graph(
embedding=OpenAIEmbeddings(),
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
index_name='loi_text',
node_label="LineOfInquiry",
text_node_properties=["fullText"],
embedding_node_property="textEmbedding",
)
neo4j_vector_store_goal = Neo4jVector.from_existing_graph(
embedding=OpenAIEmbeddings(),
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
index_name='goal_text',
node_label="Goal",
text_node_properties=["fullText"],
embedding_node_property="textEmbedding",
)
loi_retriever = neo4j_vector_store_loi.as_retriever()
goal_retriever = neo4j_vector_store_goal.as_retriever()
Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('db.create.setVectorProperty' has been replaced by 'db.create.setNodeVectorProperty')} {position: line: 1, column: 75, offset: 74} for query: "UNWIND $data AS row MATCH (n:`LineOfInquiry`) WHERE elementId(n) = row.id CALL db.create.setVectorProperty(n, 'textEmbedding', row.embedding) YIELD node RETURN count(*)" Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('db.create.setVectorProperty' has been replaced by 'db.create.setNodeVectorProperty')} {position: line: 1, column: 66, offset: 65} for query: "UNWIND $data AS row MATCH (n:`Goal`) WHERE elementId(n) = row.id CALL db.create.setVectorProperty(n, 'textEmbedding', row.embedding) YIELD node RETURN count(*)"
chain = RetrievalQAWithSourcesChain.from_chain_type(
ChatOpenAI(temperature=0),
chain_type="stuff",
retriever=tloi_retriever
)
from langchain.retrievers import EnsembleRetriever
ensemble_retriever = EnsembleRetriever(
retrievers=[tloi_retriever, loi_retriever, goal_retriever], weights=[0.34, 0.33, 0.33]
)
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
retriever = ensemble_retriever
llm = ChatOpenAI()
system_prompt = (
"Use the given context to answer the question. "
"If you don't know the answer, say you don't know. "
"Use three sentence maximum and keep the answer concise. "
"Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)
resp = chain.invoke({"input": "What is the Triggered Line of Inquiry with the lower p-value?"})
print(resp['answer'])
The Triggered Line of Inquiry with the lower p-value is: - ID: http://localhost:8080/disk-project-server/tlois/TriggeredLOI-oCyPWu4b8Rez - Execution confidence value: 0.37155457478486e0 (p-Value)
This is not the correct answer
resp = chain.invoke({"input": "Give me a summary of TriggeredLOI-oCyPWu4b8Rez"})
print(resp['answer'])
The Triggered Line of Inquiry (TriggeredLOI-oCyPWu4b8Rez) is associated with the goal of running a meta-analysis for cohorts filtered by genetic ancestry. This line of inquiry focuses on investigating the effect size of a specific genetic variant (rs1080066) on the surface area of the Precentral Cortex for cohorts with European ancestry. The meta-regression analysis aims to determine the relationship between the effect size and the mean age of each cohort.
resp = chain.invoke({"input": "Give me a summary of TriggeredLOI-oCyPWu4b8Rez, include information about the execution"})
print(resp['answer'])
Triggered Line of Inquiry TriggeredLOI-oCyPWu4b8Rez is associated with Goal-XBkQmDYmJAn0 and investigates the effect size of the genotype rs1080066 on the Surface Area trait for the Precentral Cortex among cohorts of European ancestry. The execution was conducted using the Meta-Regression workflow on multiple cohort datasets, resulting in a p-value of 0.0828042022392925, indicating a moderate level of statistical confidence in the analysis.
resp = chain.invoke({"input": "Give me a summary of TriggeredLOI-oCyPWu4b8Rez, include information about the execution and a list of the input files"})
print(resp['answer'])
TriggeredLOI-oCyPWu4b8Rez is associated with Goal-XBkQmDYmJAn0, focusing on the effect size of rs1080066 on the Precentral Cortex Surface Area for cohorts of European ancestry. The execution date was 1970-01-20 at 16:40:55-03, with a confidence value of 0.018198423339433e0 (p-Value). The input files used for this analysis were scatter-28vnxquffs6hvegarrkosp7f3.png, p_value-p59rlb9wz6dyw74xqovftx1l, brain_visualization-3hwj1p2ov0jfe132m063dxez5, and log-4ys4xb4vp4vvsfmfdzteb82kr.
Note: I've asked three time for the same TLOI but is giving different information. The files it shows as inputs are outputs...
Lets try creating tloi nodes with less text and adding a retriever query.
#Less data now
tloi_data_simpler = """
MATCH (tloi:TriggeredLineOfInquiry) -[:hasGoal]-> (g:Goal),
(tloi:TriggeredLineOfInquiry) -[:hasLineOfInquiry]-> (loi:LineOfInquiry),
(loi:LineOfInquiry) -[:hasQuestion]-> (q:Question),
(tloi:TriggeredLineOfInquiry) -[:hasWorkflowInstantiation]-> (inst:WorkflowInstantiation),
(inst:WorkflowInstantiation) -[:hasExecution]-> (exec:Execution)
WITH COLLECT {MATCH (exec:Execution) -[:hasInput]-> (ba:Binding)
RETURN apoc.text.replace(ba.variable, ".*?/" , "") + " = " + ba.value } as inputs,
tloi, g, loi, inst, exec, q
RETURN tloi.id, g.id, loi.id, q.template, inst.name, inst.description, exec.confidence_value, exec.confidence_type, apoc.text.join(inputs, "\n - ")
"""
tloi_results_2 = neo4j.query(tloi_data_simpler)
tloi_text_simpler = {}
for i in tloi_results_2:
tloi_text_simpler[i["tloi.id"]] = "[Triggered Line of Inquiry]\nID: {}\nGoal ID: {}\nLine of Inquiry ID: {}\nQuestion Template: {}\nWorkflow Name: {}\nWorkflow Description: {}\nConfidence value: {} ({})\nInputs: \n - {}".format(*i.values()).replace('http://localhost:8080/disk-project-server/tlois/','').replace('http://localhost:8080/disk-project-server/goals/','').replace('http://localhost:8080/disk-project-server/lois/','').replace('http://localhost:8080/wings-portal/export/users/admin/Enigma/data/library.owl#','')
#print(tloi_text_simpler[i["tloi.id"]])
add_simpler_text_tloi = """
MATCH (tloi:TriggeredLineOfInquiry {id: $id})
SET tloi.text = $text
RETURN tloi.text"""
for ID in tloi_text_simpler.keys():
print(neo4j.query(add_simpler_text_tloi, params={'id':ID, 'text': tloi_text_simpler[ID]}))
[{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-fEdISYTbY6OC\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.018198423339433e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv','SHA4945c5_MUNSTER_Significant_GWAS.csv','SHA4865c5_NCNG_Significant_GWAS.csv','SHA4835c5_NESDA_Significant_GWAS.csv','SHA4f55c5_NeuroIMAGE_Significant_GWAS.csv','SHA46d5c5_NTR_Significant_GWAS.csv','SHA4865c5_PDNZ_Significant_GWAS.csv','SHA47a5c5_PING_Significant_GWAS.csv','SHA3f15c5_PPMI_Significant_GWAS.csv','SHA4875c5_QTIM_Significant_GWAS.csv','SHA4bb5c5_SHIP_Significant_GWAS.csv','SHA4ee5c5_SHIP-Trend_Significant_GWAS.csv','SHA4a85c5_SYS_Significant_GWAS.csv','SHA4c15c5_TCD-NUIG_Significant_GWAS.csv','SHA4ae5c5_UiO2016_Significant_GWAS.csv','SHA3215e5_ABCD_Significant_GWAS.csv','SHA4815c5_ADNI1_Significant_GWAS.csv','SHA4bf5c6_ALSPACa_Significant_GWAS.csv','SHA4aa5c5_BETULA_Significant_GWAS.csv','SHA4945c5_BIG_Significant_GWAS.csv','SHA5305c4_BIG-PsychChip_Significant_GWAS.csv','SHA4ca5c5_CARDIFF_Significant_GWAS.csv','SHA4b15c5_DNS-V4_Significant_GWAS.csv','SHA4c65c5_EPIGEN_Significant_GWAS.csv','SHA4925c5_GIG_Significant_GWAS.csv','SHA4775c5_MPIP_Significant_GWAS.csv','SHA4765c5_OATS_Significant_GWAS.csv','SHA4ba5c5_SydneyMAS_Significant_GWAS.csv','SHA4945c5_TOP3T_Significant_GWAS.csv','SHA4aa5c5_UiO2017_Significant_GWAS.csv','SHA4c95c5_UKBB_Significant_GWAS.csv']\n - snp = rs1080066\n - area = PrecentralCortex\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6','35.8','51.6','37.5','17.1','29.4','68.2','11.8','61.7','22.4','55.8','50.4','28.3','29.9','31.8','9.96','74.8','19.6','62.4','22.6','22.5','24.8','19.9','38.4','24.2','48.3','70.5','78.4','33.2','42.1','62.8']\n - trait = SA\n - demographic_max = 0\n - demographic_min = 0\n - demographic = HasAge Mean (E)"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-oCyPWu4b8Rez\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.0828042022392925e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv','SHA4945c5_MUNSTER_Significant_GWAS.csv','SHA4865c5_NCNG_Significant_GWAS.csv','SHA4835c5_NESDA_Significant_GWAS.csv','SHA4f55c5_NeuroIMAGE_Significant_GWAS.csv','SHA46d5c5_NTR_Significant_GWAS.csv','SHA4865c5_PDNZ_Significant_GWAS.csv','SHA47a5c5_PING_Significant_GWAS.csv','SHA3f15c5_PPMI_Significant_GWAS.csv','SHA4875c5_QTIM_Significant_GWAS.csv','SHA4bb5c5_SHIP_Significant_GWAS.csv','SHA4ee5c5_SHIP-Trend_Significant_GWAS.csv','SHA4a85c5_SYS_Significant_GWAS.csv','SHA4c15c5_TCD-NUIG_Significant_GWAS.csv','SHA4ae5c5_UiO2016_Significant_GWAS.csv','SHA4815c5_ADNI1_Significant_GWAS.csv','SHA4bf5c6_ALSPACa_Significant_GWAS.csv','SHA4aa5c5_BETULA_Significant_GWAS.csv','SHA4945c5_BIG_Significant_GWAS.csv','SHA5305c4_BIG-PsychChip_Significant_GWAS.csv','SHA4ca5c5_CARDIFF_Significant_GWAS.csv','SHA4b15c5_DNS-V4_Significant_GWAS.csv','SHA4c65c5_EPIGEN_Significant_GWAS.csv','SHA4925c5_GIG_Significant_GWAS.csv','SHA4775c5_MPIP_Significant_GWAS.csv','SHA4765c5_OATS_Significant_GWAS.csv','SHA4ba5c5_SydneyMAS_Significant_GWAS.csv','SHA4945c5_TOP3T_Significant_GWAS.csv','SHA4aa5c5_UiO2017_Significant_GWAS.csv','SHA4c95c5_UKBB_Significant_GWAS.csv']\n - demographic_max = 0\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6','35.8','51.6','37.5','17.1','29.4','68.2','11.8','61.7','22.4','55.8','50.4','28.3','29.9','31.8','74.8','19.6','62.4','22.6','22.5','24.8','19.9','38.4','24.2','48.3','70.5','78.4','33.2','42.1','62.8']\n - area = PrecentralCortex\n - demographic_min = 0\n - demographic = HasAge Mean (E)\n - trait = SA\n - snp = rs1080066"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-j5QRPbmS5u61\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.020618776795934e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv','SHA4945c5_MUNSTER_Significant_GWAS.csv','SHA4865c5_NCNG_Significant_GWAS.csv','SHA4835c5_NESDA_Significant_GWAS.csv','SHA4f55c5_NeuroIMAGE_Significant_GWAS.csv','SHA46d5c5_NTR_Significant_GWAS.csv','SHA4865c5_PDNZ_Significant_GWAS.csv','SHA47a5c5_PING_Significant_GWAS.csv','SHA3f15c5_PPMI_Significant_GWAS.csv','SHA4875c5_QTIM_Significant_GWAS.csv','SHA4bb5c5_SHIP_Significant_GWAS.csv','SHA4ee5c5_SHIP-Trend_Significant_GWAS.csv','SHA4a85c5_SYS_Significant_GWAS.csv','SHA4c15c5_TCD-NUIG_Significant_GWAS.csv','SHA4ae5c5_UiO2016_Significant_GWAS.csv','SHA4815c5_ADNI1_Significant_GWAS.csv','SHA4bf5c6_ALSPACa_Significant_GWAS.csv','SHA4aa5c5_BETULA_Significant_GWAS.csv','SHA4945c5_BIG_Significant_GWAS.csv','SHA5305c4_BIG-PsychChip_Significant_GWAS.csv','SHA4ca5c5_CARDIFF_Significant_GWAS.csv','SHA4b15c5_DNS-V4_Significant_GWAS.csv','SHA4c65c5_EPIGEN_Significant_GWAS.csv','SHA4925c5_GIG_Significant_GWAS.csv','SHA4775c5_MPIP_Significant_GWAS.csv','SHA4765c5_OATS_Significant_GWAS.csv','SHA4ba5c5_SydneyMAS_Significant_GWAS.csv','SHA4945c5_TOP3T_Significant_GWAS.csv','SHA4aa5c5_UiO2017_Significant_GWAS.csv']\n - area = PrecentralCortex\n - trait = SA\n - demographic = HasAge Mean (E)\n - demographic_max = 0\n - demographic_min = 0\n - snp = rs1080066\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6','35.8','51.6','37.5','17.1','29.4','68.2','11.8','61.7','22.4','55.8','50.4','28.3','29.9','31.8','74.8','19.6','62.4','22.6','22.5','24.8','19.9','38.4','24.2','48.3','70.5','78.4','33.2','42.1']"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-SP3oHYmxkUrM\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.500060579258888e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv','SHA4945c5_MUNSTER_Significant_GWAS.csv','SHA4865c5_NCNG_Significant_GWAS.csv','SHA4835c5_NESDA_Significant_GWAS.csv','SHA4f55c5_NeuroIMAGE_Significant_GWAS.csv','SHA46d5c5_NTR_Significant_GWAS.csv','SHA4865c5_PDNZ_Significant_GWAS.csv','SHA47a5c5_PING_Significant_GWAS.csv','SHA3f15c5_PPMI_Significant_GWAS.csv','SHA4875c5_QTIM_Significant_GWAS.csv','SHA4bb5c5_SHIP_Significant_GWAS.csv','SHA4ee5c5_SHIP-Trend_Significant_GWAS.csv','SHA4a85c5_SYS_Significant_GWAS.csv','SHA4c15c5_TCD-NUIG_Significant_GWAS.csv','SHA4ae5c5_UiO2016_Significant_GWAS.csv','SHA4815c5_ADNI1_Significant_GWAS.csv','SHA4bf5c6_ALSPACa_Significant_GWAS.csv','SHA4aa5c5_BETULA_Significant_GWAS.csv','SHA4945c5_BIG_Significant_GWAS.csv','SHA5305c4_BIG-PsychChip_Significant_GWAS.csv','SHA4ca5c5_CARDIFF_Significant_GWAS.csv']\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6','35.8','51.6','37.5','17.1','29.4','68.2','11.8','61.7','22.4','55.8','50.4','28.3','29.9','31.8','74.8','19.6','62.4','22.6','22.5','24.8']\n - trait = SA\n - demographic_min = 0\n - snp = rs1080066\n - area = PrecentralCortex\n - demographic_max = 0\n - demographic = HasAge Mean (E)"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-MA2p3owIlWh3\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.37155457478486e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv','SHA4945c5_MUNSTER_Significant_GWAS.csv','SHA4865c5_NCNG_Significant_GWAS.csv','SHA4835c5_NESDA_Significant_GWAS.csv','SHA4f55c5_NeuroIMAGE_Significant_GWAS.csv','SHA46d5c5_NTR_Significant_GWAS.csv','SHA4865c5_PDNZ_Significant_GWAS.csv','SHA47a5c5_PING_Significant_GWAS.csv','SHA3f15c5_PPMI_Significant_GWAS.csv','SHA4875c5_QTIM_Significant_GWAS.csv','SHA4bb5c5_SHIP_Significant_GWAS.csv']\n - demographic_min = 0\n - area = PrecentralCortex\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6','35.8','51.6','37.5','17.1','29.4','68.2','11.8','61.7','22.4','55.8']\n - trait = SA\n - demographic = HasAge Mean (E)\n - demographic_max = 0\n - snp = rs1080066"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-E8PbUbCdZB4K\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.044376539244712e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv','SHA4c45c5_BONN_Significant_GWAS.csv','SHA4ad5c5_BrainScale_Significant_GWAS.csv','SHA4a75c5_DNS-V3_Significant_GWAS.csv','SHA4935c5_GSP_Significant_GWAS.csv','SHA4925c5_HUNT_Significant_GWAS.csv','SHA4b85c5_IMAGEN_Significant_GWAS.csv','SHA4bf5c5_ImpACT_Significant_GWAS.csv','SHA4b45c5_LBC1936_Significant_GWAS.csv','SHA48a5c5_LIBD_Significant_GWAS.csv','SHA4905c5_MooDS_Significant_GWAS.csv']\n - area = PrecentralCortex\n - demographic = HasAge Mean (E)\n - snp = rs1080066\n - demographic_max = 0\n - trait = SA\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4','38.2','10','19.7','21.4','58.9','14.6','40.8','72.7','33.2','33.6']\n - demographic_min = 0"}] [{'tloi.text': "[Triggered Line of Inquiry]\nID: TriggeredLOI-bfjxauj6EcYQ\nGoal ID: Goal-XBkQmDYmJAn0\nLine of Inquiry ID: LOI-Q7zw0HsrUwwD\nQuestion Template: Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value\nWorkflow Name: Meta-Regression\nWorkflow Description: Meta regression is considered as an extended statistical model of meta analysis, regressing the effect size against variable(s) of interest to account for the systematic differences of the effect sizes being meta-analyzed\nConfidence value: 0.263176970715283e0 (p-Value)\nInputs: \n - cohortData = ['SHA47a5c5_ASRB_Significant_GWAS.csv','SHA4b15c5_FOR2107_Significant_GWAS.csv','SHA4b15c5_HUBIN_Significant_GWAS.csv','SHA4835c5_MCIC_Significant_GWAS.csv','SHA4825c5_MPRC_Significant_GWAS.csv','SHA4a65c5_PAFIP_Significant_GWAS.csv','SHA4865c5_TOP_Significant_GWAS.csv','SHA4335c5_UMCU_Significant_GWAS.csv','SHA4fe5c4_1000BRAINS_Significant_GWAS.csv','SHA4a85c5_ADNI2GO_Significant_GWAS.csv']\n - demographic_max = 0\n - area = PrecentralCortex\n - demographic = HasAge Mean (E)\n - demographic_value = ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4']\n - trait = SA\n - demographic_min = 0\n - snp = rs1080066"}]
neo4j.query("""
CREATE VECTOR INDEX `tloi_simple_text` IF NOT EXISTS
FOR (t:TriggeredLineOfInquiry) ON (t.simpleTextEmbedding)
OPTIONS { indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
""")
[]
add_simpler_embedings_tloi = """
MATCH (tloi:TriggeredLineOfInquiry {id:$id}) WHERE tloi.simpleTextEmbedding IS NULL
CALL apoc.ml.openai.embedding([tloi.text], $apiKey) yield embedding
CALL db.create.setNodeVectorProperty(tloi, "simpleTextEmbedding", embedding)
"""
for ID in tloi_text_simpler.keys():
print(neo4j.query(add_simpler_embedings_tloi, params={"apiKey":OPENAI_API_KEY, 'id':ID}))
sleep(2)
[] [] [] [] [] [] []
#More data for tlois:
# Try again, only goal description as loi is too long.
retrieval_query = """
OPTIONAL MATCH (tloi:TriggeredLineOfInquiry) -[hasGoal]-> (g:Goal)
RETURN g.fullText as text, score, {} AS metadata
"""
#Simpler retriever
neo4j_vector_store_tloi_s = Neo4jVector.from_existing_graph(
embedding=OpenAIEmbeddings(),
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
index_name='tloi_simple_text',
node_label="TriggeredLineOfInquiry",
text_node_properties=["text"],
embedding_node_property="simpleTextEmbedding",
#retrieval_query=retrieval_query,
)
tloi_retriever_simpler = neo4j_vector_store_tloi_s.as_retriever()
Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('db.create.setVectorProperty' has been replaced by 'db.create.setNodeVectorProperty')} {position: line: 1, column: 84, offset: 83} for query: "UNWIND $data AS row MATCH (n:`TriggeredLineOfInquiry`) WHERE elementId(n) = row.id CALL db.create.setVectorProperty(n, 'simpleTextEmbedding', row.embedding) YIELD node RETURN count(*)"
retriever = tloi_retriever_simpler
llm = ChatOpenAI()
system_prompt = TEXT_CONTEXT + (
"Use the given context to answer the question. "
"If you don't know the answer, say you don't know. "
#"Use three sentence maximum and keep the answer concise. "
"Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)
# Lets try the same question:
resp = chain.invoke({"input": "What is the Triggered Line of Inquiry with the lower p-value?"})
print(resp['answer'])
The Triggered Line of Inquiry with the lower p-value is the one with ID TriggeredLOI-j5QRPbmS5u61, which has a confidence value of 0.020618776795934e0 (p-Value).
Better, but the min p-value is 0.018198423339433e0 for TriggeredLOI-fEdISYTbY6OC
resp = chain.invoke({"input": "What is the Triggered Line of Inquiry with the lower confidence value?"})
print(resp['answer'])
The Triggered Line of Inquiry with the lower confidence value is TriggeredLOI-j5QRPbmS5u61 with a confidence value of 0.020618776795934e0 (p-Value).
resp = chain.invoke({"input": "Give me a summary of TriggeredLOI-j5QRPbmS5u61, include information about the execution and input files used"})
print(resp['answer'])
TriggeredLOI-j5QRPbmS5u61 is associated with the Goal ID: Goal-XBkQmDYmJAn0 and Line of Inquiry ID: LOI-Q7zw0HsrUwwD. The question template for this triggered line of inquiry is "Is the effect size of ?Genotype on ?BrainImagingTrait of ?Region associated with ?DemographicAttribute for cohorts groups filtered by ?Criterion for ?Value". The workflow used is Meta-Regression, which involves regressing the effect size against variable(s) of interest to account for systematic differences in effect sizes being meta-analyzed. The confidence value for this triggered line of inquiry is 0.020618776795934e0 (p-Value). The inputs used for this execution include: - cohortData: A list of significant GWAS files from various cohorts - area: PrecentralCortex - trait: SA - demographic: HasAge Mean (E) - demographic_max: 0 - demographic_min: 0 - snp: rs1080066 - demographic_value: A list of demographic values These inputs were used to customize the data query template and workflow seed for the execution associated with TriggeredLOI-j5QRPbmS5u61.
resp = chain.invoke({"input": "For TriggeredLOI-j5QRPbmS5u61, give me the question template with all variables replaced for values"})
print(resp['answer'])
The question template with all variables replaced for values in TriggeredLOI-j5QRPbmS5u61 is: "Is the effect size of rs1080066 on SA of PrecentralCortex associated with HasAge Mean (E) for cohorts groups filtered by 0 for ['38.5','34.4','41.9','33.7','37.2','28.3','35.2','33.1','67.3','72.4']"
Almost correct, is not able to determine what is the ethnic value as those information is in the question graph, we have not load it yet.
FULL_TEXT = TEXT_CONTEXT
for key in goal_text:
part = goal_text[key].replace('http://localhost:8080/disk-project-server/goals/','')
#print(part)
FULL_TEXT += '\n' + part
for key in loi_text:
part = loi_text[key].replace('http://localhost:8080/disk-project-server/lois/','')
#print(part)
FULL_TEXT += '\n' + part
for key in tloi_text:
part = tloi_text[key].replace('http://localhost:8080/disk-project-server/tlois/','').replace('http://localhost:8080/wings-portal/export/users/admin/Enigma/data/library.owl#','')
part = part.replace('http://localhost:8080/disk-project-server/goals/','').replace('http://localhost:8080/disk-project-server/lois/','')
#print(part)
FULL_TEXT += '\n' + part
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Global constants
VECTOR_INDEX_NAME = 'disk_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 2000,
chunk_overlap = 200,
length_function = len,
is_separator_regex = False,
)
text_chunks = text_splitter.split_text(FULL_TEXT)
text_chunks[0]
'[GENERAL CONTEXT]\nA Question Template is a text representation of possible questions the DISK system is able to test.\nQuestion Templates contains one or more Question Variables that are denoted by the prefix “?” (e.g ?Genotype is the Question Variable "Genotype").\nQuestion Variables provide multiple options retrieved from the data source. Users can select option values to customize the Question Template. \n\nA Goal is what a DISK user wants to test. Goals are identified by an ID and have Name and Description.\nGoals follow a Question Template and provide values for all of its Question Variables.\n\nA Line of Inquiry is how DISK will test a Question Template. Lines of inquiry are identified by ID and have the follorwing properties: Name, Description, Data Query Template and Workflow Seed.\nLines of Inquiry follow a Question Template and use Question Variable values to customize its Data Query Template and Workflow Seed.\n\nWhen the DISK system finds a Goal and a Line of Inquiry that follows the same Question template, a Triggered Line of Inquiry is created.\nA Triggered Line of Inquiry is identified by an ID, Data Query and Workflow Instantiation.\nThe Triggered Line of Inquiry Data Query is created by using the Goal Question Variable Values to customize the Line of Inquiry Data Query Template. \nThis data query is used to retrieve inputs and parameters to use on the Workflow Seed. When all parameters and inputs are set, a new Execution is send.\nThis data query is executed periodically and when new data is found a new Triggered Line of Inquiry is created.\n\nAn Execution is a workflow run. Uses the data gathered by the Triggered Line of Inquiry to customize the run of an experiment.\nThis experiment can return a confidence value and one or several output files.'
Idea: Generate only one node for all kind of text info