library(InterMineR) listMines() humanMine <- listMines()["HumanMine"] #select humanmine humanMine #print out the value to see what's inside im <- initInterMine(mine=humanMine, "YOUR TOKEN HERE") query1Diabetes <- setQuery( # here we're choosing which columns of data we'd like to see select = c("Gene.primaryIdentifier", "Gene.symbol"), # set the logic for constraints. The first constraint is the first path+operator+value, # e.g. Gene.organism.name = Homo sapiens, and the second constraint is the combination # of the second path+operator+value, e.g. Gene.diseases.name CONTAINS diabetes where = setConstraints( paths = c("Gene.organism.name", "Gene.diseases.name"), operators = c("=", "CONTAINS"), values = list("Homo sapiens","diabetes") ) ) query1DiabetesResults <- runQuery(im,query1Diabetes) # and let's print out the first few results to make sure it looks like we'd expect: head(query1DiabetesResults) # We don't want to see *all* genes and their expression. # Let's narrow it down a little by constraining it to genes that are of interest query2UpInPancreasConstraint = setConstraints( paths = c("Gene", "Gene.proteinAtlasExpression.level", "Gene.proteinAtlasExpression.level", "Gene.proteinAtlasExpression.tissue.name"), operators = c("IN", rep("=", 3)), # each constraint is automatically given a code, allowing us to manipulate the # logic for the constraint. # So for us, constraints are set to codes A, B, C, D in order, # e.g. Code A: "Gene" should be "IN" the list named "PL_DiabetesGenes" # Code B: "Gene.proteinAtlasExpression.level" should be equal to "Medium" # Code C: "Gene.proteinAtlasExpression.level" should be equal to "High" # Code D: "Gene.proteinAtlasExpression.tissue.name" should be equal to Pancreas" # # Now, you might be thinking "how can the expression level be equal to both Medium # AND High?" The answer is - it can't, but take a quick look at the constraintLogic # we will set in the next code cell for an explanation values = list("PL_Pax6_Targets", "Medium", "High", "Pancreas") ) # Create a new query query2UpInPancreas = newQuery( # Choose which columns of data we'd like to see view = c("Gene.primaryIdentifier", "Gene.symbol", "Gene.proteinAtlasExpression.cellType", "Gene.proteinAtlasExpression.level", "Gene.proteinAtlasExpression.tissue.name" ), # set the logic for constraints. This means our pancreas expression level # is EITHER Medium (B) or High (C), but not both. # -- # Note: Constraint logic only needs to be set if you wish to use OR. All other # constraints have AND logic applied by default. constraintLogic = "A and (B or C) and D" ) # Add the constraint to our expressed pancreas query (previously we just _defined_ the constraint) query2UpInPancreas$where <- query2UpInPancreasConstraint # Now we have the query set up the way we want, let's actually *run* the query! query2UpInPancreasResults <- runQuery(im = im, qry = query2UpInPancreas) # Show me the first few results please! head(query2UpInPancreasResults) # Extract the primaryIdentifier columns from query1 (diabetes genes) and query 2 (upexpressed in pancreas) primaryIdentifiers.diabetes <- query1DiabetesResults[["Gene.primaryIdentifier"]] primaryIdentifiers.pancreas <- query2UpInPancreasResults[["Gene.primaryIdentifier"]] # Find the intersection of the two lists of primary identifiers diabetesAndPancreasGenes <- intersect(primaryIdentifiers.diabetes,primaryIdentifiers.pancreas) # Show the results print(diabetesAndPancreasGenes) # First, we set up the constraints. The last three constraints are the # diabetesAndPancreas result genes from our last query. query3GWASConstraints <- setConstraints( paths = c("GWAS.results.pValue", "GWAS.results.phenotype", # using rep so we don't have to type this three times... rep("GWAS.results.associatedGenes.primaryIdentifier",3) ), operators = c("<=", "CONTAINS", rep("=",3)), values = list("1e-04", #A "diabetes",#B "3172", #C "6928", #D "6934") #E ) query3GWAS <- newQuery( # Quite a few columns this time! view = c("GWAS.results.associatedGenes.primaryIdentifier", "GWAS.results.associatedGenes.symbol", "GWAS.results.associatedGenes.name", "GWAS.results.SNP.primaryIdentifier", "GWAS.results.pValue", "GWAS.results.phenotype", "GWAS.firstAuthor", "GWAS.name", "GWAS.publication.pubMedId", "GWAS.results.associatedGenes.organism.shortName"), # set the logic for constraints. Remember that we want our results # to include any one of the three genes we found in the list of diabetes+pancreas genes # so we need to use some OR logic. constraintLogic = "A and B and (C or D or E)" ) #add constraint query3GWAS$where <- query3GWASConstraints #run query query3GWASResults <- runQuery(im, query3GWAS) query3GWASResults GWASIds <- query3GWASResults["GWAS.results.associatedGenes.symbol"] unique(GWASIds)