from collections import Counter
import sys
sys.path.append('..')
from filter_clinvar_xml import filter_xml, pprint, iterate_cvs_from_xml
from clinvar_xml_io.clinvar_xml_io import *
Questions to address:
# July 2022 data
drug_xml = '/home/april/projects/opentargets/drug-response.xml.gz'
dataset = ClinVarDataset(drug_xml)
# Entire CVS record (RCV + SCV) for reference
for raw_cvs_xml in iterate_cvs_from_xml(drug_xml):
pprint(raw_cvs_xml)
break
<ClinVarSet ID="74627773"> <RecordStatus>current</RecordStatus> <Title>NM_000769.4(CYP2C19):c.-806C>A AND clopidogrel response - Dosage, Efficacy, Toxicity/ADR</Title> <ReferenceClinVarAssertion DateCreated="2016-05-18" DateLastUpdated="2021-09-29" ID="503964"> <ClinVarAccession Acc="RCV000211201" Version="2" Type="RCV" DateUpdated="2021-09-29" /> <RecordStatus>current</RecordStatus> <ClinicalSignificance DateLastEvaluated="2016-06-14"> <ReviewStatus>reviewed by expert panel</ReviewStatus> <Description>drug response</Description> </ClinicalSignificance> <Assertion Type="variation to disease" /> <ObservedIn> <Sample> <Origin>germline</Origin> <Species TaxonomyId="9606">human</Species> <AffectedStatus>yes</AffectedStatus> </Sample> <Method> <MethodType>curation</MethodType> </Method> <ObservedData ID="74435109"> <Attribute Type="Description">not provided</Attribute> </ObservedData> </ObservedIn> <MeasureSet Type="Variant" ID="225946" Acc="VCV000225946" Version="3"> <Measure Type="single nucleotide variant" ID="227770"> <Name> <ElementValue Type="Preferred">NM_000769.4(CYP2C19):c.-806C>A</ElementValue> </Name> <Name> <ElementValue Type="Alternate">NM_000769.2(CYP2C19):c.-806C>A</ElementValue> </Name> <CanonicalSPDI>NC_000010.11:94761899:C:A</CanonicalSPDI> <AttributeSet> <Attribute Accession="NG_055436" Version="1" Change="g.1260C>A" Type="HGVS, genomic, RefSeqGene">NG_055436.1:g.1260C>A</Attribute> </AttributeSet> <AttributeSet> <Attribute Accession="NG_008384" Version="3" Change="g.4220C>A" Type="HGVS, genomic, RefSeqGene">NG_008384.3:g.4220C>A</Attribute> </AttributeSet> <AttributeSet> <Attribute Accession="NC_000010" Version="11" Change="g.94761900C>A" Type="HGVS, genomic, top level" integerValue="38">NC_000010.11:g.94761900C>A</Attribute> </AttributeSet> <AttributeSet> <Attribute Accession="NC_000010" Version="10" Change="g.96521657C>A" Type="HGVS, genomic, top level, previous" integerValue="37">NC_000010.10:g.96521657C>A</Attribute> </AttributeSet> <GlobalMinorAlleleFrequency Value="0.15316" Source="1000 Genomes Project" MinorAllele="T" /> <CytogeneticLocation>10q23.33</CytogeneticLocation> <SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="94761900" stop="94761900" display_start="94761900" display_stop="94761900" variantLength="1" positionVCF="94761900" referenceAlleleVCF="C" alternateAlleleVCF="A" /> <SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="96521657" stop="96521657" display_start="96521657" display_stop="96521657" variantLength="1" positionVCF="96521657" referenceAlleleVCF="C" alternateAlleleVCF="A" /> <MeasureRelationship Type="near gene, upstream"> <Name> <ElementValue Type="Preferred">cytochrome P450 family 2 subfamily C member 19</ElementValue> </Name> <Symbol> <ElementValue Type="Preferred">CYP2C19</ElementValue> </Symbol> <SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="94762681" stop="94855547" display_start="94762681" display_stop="94855547" Strand="+" /> <SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="96522462" stop="96612670" display_start="96522462" display_stop="96612670" variantLength="90209" Strand="+" /> <XRef ID="1557" DB="Gene" /> <XRef Type="MIM" ID="124020" DB="OMIM" /> <XRef ID="HGNC:2621" DB="HGNC" /> </MeasureRelationship> <MeasureRelationship Type="within single gene"> <Name> <ElementValue Type="Preferred">CYP2C19 promoter</ElementValue> </Name> <Symbol> <ElementValue Type="Preferred">LOC110599570</ElementValue> </Symbol> <SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="94760741" stop="94762704" display_start="94760741" display_stop="94762704" Strand="+" /> <XRef ID="110599570" DB="Gene" /> </MeasureRelationship> <Citation Type="practice guideline" Abbrev="CPIC, 2011"> <ID Source="PubMed">21716271</ID> <ID Source="pmc">3234301</ID> </Citation> <XRef ID="1043859080" DB="PharmGKB" /> <XRef ID="1043859080PA10074" DB="PharmGKB" /> <XRef ID="1043859080PA449015" DB="PharmGKB" /> <XRef ID="655386913" DB="PharmGKB" /> <XRef ID="655386913PA449053" DB="PharmGKB" /> <XRef ID="1043859080" DB="PharmGKB Clinical Annotation" /> <XRef ID="655386913" DB="PharmGKB Clinical Annotation" /> <XRef Type="rs" ID="12248560" DB="dbSNP" /> </Measure> <Name> <ElementValue Type="Preferred">NM_000769.4(CYP2C19):c.-806C>A</ElementValue> </Name> <Name> <ElementValue Type="Preferred">NM_000769.4(CYP2C19):c.-806C>A</ElementValue> </Name> <Name> <ElementValue Type="Preferred">NM_000769.4(CYP2C19):c.-806C>A</ElementValue> </Name> <XRef ID="CA10576167" DB="ClinGen" /> </MeasureSet> <TraitSet Type="DrugResponse" ID="26824"> <Trait ID="35423" Type="DrugResponse"> <Name> <ElementValue Type="Preferred">clopidogrel response - Dosage, Efficacy, Toxicity/ADR</ElementValue> </Name> <XRef ID="CN236507" DB="MedGen" /> </Trait> </TraitSet> </ReferenceClinVarAssertion> <ClinVarAssertion ID="503646" SubmissionName="PharmGKB-ClinVar 2018-05"> <ClinVarSubmissionID localKey="655386913PA449053|clopidogrel response - Dosage, Efficacy, Toxicity/ADR" submittedAssembly="GRCh38" submitter="PharmGKB" submitterDate="2018-06-18" /> <ClinVarAccession Acc="SCV000268179" Version="3" Type="SCV" OrgID="500295" OrganizationCategory="resource" OrgType="primary" DateUpdated="2021-09-29" /> <RecordStatus>current</RecordStatus> <ClinicalSignificance DateLastEvaluated="2016-06-14"> <ReviewStatus>reviewed by expert panel</ReviewStatus> <Description>drug response</Description> <Citation> <ID Source="PubMed">19463375</ID> </Citation> <Citation> <ID Source="PubMed">20083681</ID> </Citation> <Citation> <ID Source="PubMed">20492469</ID> </Citation> <Citation> <ID Source="PubMed">20801498</ID> </Citation> <Citation> <ID Source="PubMed">20826260</ID> </Citation> <Citation> <ID Source="PubMed">21392617</ID> </Citation> <Citation> <ID Source="PubMed">22028352</ID> </Citation> <Citation> <ID Source="PubMed">22190063</ID> </Citation> <Citation> <ID Source="PubMed">22228204</ID> </Citation> <Citation> <ID Source="PubMed">22462746</ID> </Citation> <Citation> <ID Source="PubMed">22704413</ID> </Citation> <Citation> <ID Source="PubMed">22955794</ID> </Citation> <Citation> <ID Source="PubMed">22990067</ID> </Citation> <Citation> <ID Source="PubMed">23364775</ID> </Citation> <Citation> <ID Source="PubMed">23726091</ID> </Citation> <Citation> <ID Source="PubMed">23809542</ID> </Citation> <Citation> <ID Source="PubMed">23922007</ID> </Citation> <Citation> <ID Source="PubMed">24019397</ID> </Citation> <Comment>PharmGKB Level of Evidence 1A: Annotation for a variant-drug combination in a CPIC or medical society-endorsed PGx guideline, or implemented at a PGRN site or in another major health system.</Comment> </ClinicalSignificance> <Assertion Type="variation to disease" /> <ExternalID DB="Pharmacogenomics Knowledge Base" ID="655386913PA449053" /> <AttributeSet> <Attribute Type="AssertionMethod">Pharmacogenomics knowledge for personalized medicine</Attribute> <Citation> <ID Source="PubMed">22992668</ID> </Citation> </AttributeSet> <ObservedIn> <Sample> <Origin>germline</Origin> <Species TaxonomyId="9606">human</Species> <AffectedStatus>yes</AffectedStatus> </Sample> <Method> <MethodType>curation</MethodType> </Method> <ObservedData> <Attribute Type="Description">not provided</Attribute> </ObservedData> </ObservedIn> <MeasureSet Type="Variant"> <Measure Type="Variation"> <AttributeSet> <Attribute Type="HGVS">NC_000010.10:g.96521657C>A</Attribute> </AttributeSet> <MeasureRelationship Type="variant in gene"> <Symbol> <ElementValue Type="Preferred">CYP2C19</ElementValue> </Symbol> </MeasureRelationship> <XRef DB="dbSNP" ID="12248560" Type="rsNumber" /> </Measure> </MeasureSet> <TraitSet Type="DrugResponse"> <Trait Type="DrugResponse"> <Name> <ElementValue Type="Preferred">clopidogrel response - Dosage, Efficacy, Toxicity/ADR</ElementValue> </Name> <TraitRelationship Type="DrugResponseAndDisease"> <Name> <ElementValue Type="Preferred">Acute coronary syndrome</ElementValue> </Name> </TraitRelationship> <TraitRelationship Type="DrugResponseAndDisease"> <Name> <ElementValue Type="Preferred">Coronary Artery Disease</ElementValue> </Name> </TraitRelationship> <TraitRelationship Type="DrugResponseAndDisease"> <Name> <ElementValue Type="Preferred">Myocardial Infarction</ElementValue> </Name> </TraitRelationship> </Trait> </TraitSet> <Citation> <URL>https://www.pharmgkb.org/clinicalAnnotation/655386913</URL> </Citation> <Comment>Drug is not necessarily used to treat response condition</Comment> </ClinVarAssertion> </ClinVarSet>
Example RCV000211201 - contains trait relationship between drug and disease but only in SCV not RCV record. (Note also there's only one SCV for this RCV.)
SCV:
<TraitSet Type="DrugResponse">
<Trait Type="DrugResponse">
<Name>
<ElementValue Type="Preferred">clopidogrel response - Dosage, Efficacy, Toxicity/ADR</ElementValue>
</Name>
<TraitRelationship Type="DrugResponseAndDisease">
<Name>
<ElementValue Type="Preferred">Acute coronary syndrome</ElementValue>
</Name>
</TraitRelationship>
<TraitRelationship Type="DrugResponseAndDisease">
<Name>
<ElementValue Type="Preferred">Coronary Artery Disease</ElementValue>
</Name>
</TraitRelationship>
<TraitRelationship Type="DrugResponseAndDisease">
<Name>
<ElementValue Type="Preferred">Myocardial Infarction</ElementValue>
</Name>
</TraitRelationship>
</Trait>
</TraitSet>
RCV:
<TraitSet Type="DrugResponse" ID="26824">
<Trait ID="35423" Type="DrugResponse">
<Name>
<ElementValue Type="Preferred">clopidogrel response - Dosage, Efficacy, Toxicity/ADR</ElementValue>
</Name>
<XRef ID="CN236507" DB="MedGen" />
</Trait>
</TraitSet>
# Check whether any of the RCV records have this kind of information
for record in dataset:
if len(record.trait_set) > 1:
# No trait set with both a drug and a disease
print(record.accession)
print([trait.preferred_or_other_valid_name for trait in record.trait_set])
for trait in record.trait_set:
# No traits in RCV with relationship element
relationships = find_elements(trait.trait_xml, './TraitRelationship')
if relationships:
print(record.accession)
pprint(trait.trait_xml)
RCV001824998 ['Cabozantinib resistance', 'Entrectinib resistance', 'Larotrectinib resistance', 'Repotrectinib resistance', 'Selitrectinib resistance']
def get_name(x):
return ClinVarTrait(x, None).preferred_or_other_valid_name
def is_pgkb(raw_cvs_xml):
scvs = find_elements(raw_cvs_xml, './ClinVarAssertion/ClinVarSubmissionID')
submitters = {scv.attrib.get('submitter') for scv in scvs}
return 'PharmGKB' in submitters
# Check whether all the SCV records have this kind of information
n = 0
count_all = 0
count_pgkb = 0
all_strs = set()
for raw_cvs_xml in iterate_cvs_from_xml(drug_xml):
n += 1
elts = find_elements(raw_cvs_xml, './ClinVarAssertion/TraitSet/Trait')
for e in elts:
if e.attrib['Type'] == 'DrugResponse':
relations = find_elements(e, './TraitRelationship')
name = get_name(e)
background_traits = []
for r in relations:
if r.attrib['Type'] == 'DrugResponseAndDisease':
background_traits.append(get_name(r))
if background_traits:
count_all += 1
if is_pgkb(raw_cvs_xml):
count_pgkb += 1
all_strs.add(f'*{get_name(e)} => {background_traits}')
else:
all_strs.add(f'{get_name(e)} => {background_traits}')
for s in all_strs:
print(s)
*hmg coa reductase inhibitors response - Toxicity => ['statin-related myopathy'] *nicotine response - Toxicity => ['Tobacco Use Disorder'] *azathioprine response - Toxicity => ['Inflammatory Bowel Diseases', 'Myelosuppression'] Piroxicam response => ['Pain', 'Inflammation', 'Osteoarthritis', 'Rheumatoid arthritis'] *halothane response - Toxicity => ['Malignant Hyperthermia'] *warfarin response - Toxicity/ADR => ['Over-anticoagulation'] *efavirenz response - Metabolism/PK => ['HIV Infections'] Prednisolone response => ['Minimal change disease'] efavirenz response => ['HIV'] Deutetrabenazine response => ['Chorea', 'Huntington disease', 'Tardive dyskinesia'] Lesinurad response => ['Gout'] *rosuvastatin response - Efficacy => ['Hypercholesterolemia', 'Myocardial Infarction'] Dabrafenib response => ['Pancreatic Adenocarcinoma'] *tobramycin response - Toxicity => ['Ototoxicity'] *peginterferon alfa-2b and ribavirin response - Toxicity => ['Anemia', 'Hepatitis C, Chronic'] *captopril response - Efficacy => ['Diabetes Mellitus, Type 2', 'Heart Failure', 'Pulmonary Disease, Chronic Obstructive'] Everolimus response => [None] Dopamine agonist response => ['Macroprolactinoma'] Imatinib response => [None] Corticosteroid response => ['Chronic kidney disease'] *Platinum compounds response - Efficacy => ['Neoplasms'] *streptomycin response - Toxicity => ['Ototoxicity'] Warfarin response => ['hemorrhage'] *atorvastatin response - Toxicity => ['statin-related myopathy'] Anti-PDL1 response => ['Cancer'] *simvastatin response - Toxicity => ['statin-related myopathy'] *gefitinib response - Efficacy => ['Carcinoma, Non-Small-Cell Lung', 'Drug Resistance'] *hydrochlorothiazide response - Efficacy => ['Essential hypertension', 'Hypertension'] *interferons, peginterferon alfa-2a, peginterferon alfa-2b and ribavirin response - Efficacy => ['Hepatitis C, Chronic'] *fluorouracil response - Toxicity => ['Neoplasms'] *desflurane response - Toxicity => ['Malignant Hyperthermia'] *methotrexate response - Metabolism/PK => ['Burkitt Lymphoma', 'Leukemia', 'Lymphoma', 'Lymphoma, T-Cell', 'Precursor Cell Lymphoblastic Leukemia-Lymphoma'] *nevirapine response - Toxicity => ['Epidermal Necrolysis, Toxic', 'Stevens-Johnson Syndrome'] Phenytoin response => ['status epilepticus'] Regorafenib response => ['Colorectal Neoplasms'] None => ['Non-small cell lung cancer'] *atorvastatin response - Efficacy => ['Coronary Disease', 'Hyperlipidemias'] *ivacaftor / lumacaftor response - Efficacy => ['Cystic Fibrosis'] Histone Methylation Therapy response => ['Cancer'] *peginterferon alfa-2a, peginterferon alfa-2b, ribavirin and telaprevir response - Efficacy => ['Hepatitis C, Chronic'] RAS Inhibitor response => ['Cancer'] *pravastatin response - Efficacy => ['Coronary Disease', 'Myocardial Infarction'] deoxygalactonojirimycin response => ['Fabry disease'] *methoxyflurane response - Toxicity => ['Malignant Hyperthermia'] *phenprocoumon response - Toxicity => ['Hemorrhage', 'over-anticoagulation', 'time above therapeutic range'] *efavirenz response - Toxicity => ['HIV Infections'] *tegafur response - Toxicity => ['Neoplasms'] MEK Inhibitor response => ['Cancer'] *ivacaftor / tezacaftor response - Efficacy => ['Cystic Fibrosis'] *enflurane response - Toxicity => ['Malignant Hyperthermia'] AKT1 Inhibitor response => ['Cancer'] *rosuvastatin response - Metabolism/PK => ['Hypercholesterolemia'] *methotrexate response - Toxicity => ['Arthritis, Juvenile Rheumatoid', 'Arthritis, Psoriatic', 'Arthritis, Rheumatoid', 'Drug Toxicity', 'Leukopenia', 'Neoplasms', 'Neutropenia', 'Osteosarcoma', 'Precursor Cell Lymphoblastic Leukemia-Lymphoma', 'Thrombocytopenia', 'Toxic liver disease', 'hematotoxicity', 'mucositis', 'primary central nervous system lymphoma'] *salmeterol response - Efficacy => ['Asthma'] *peginterferon alfa-2a, peginterferon alfa-2b and ribavirin response - Efficacy => ['Hepatitis C'] *acenocoumarol response - Dosage => ['Atrial Fibrillation'] Corticosteroid response => ['Minimal Change disease'] Flurbiprofen response => ['Pain', 'Inflammation', 'Osteoarthritis', 'Rheumatoid Arthritis', 'Bursitis', 'Tendinitis'] WEE1 Inhibitor response => ['Cancer'] *peginterferon alfa-2b response - Efficacy => ['HIV Infections', 'Hepatitis C'] *ethanol response - Toxicity => ['Alcoholism'] *etanercept response - Efficacy => ['Arthritis, Psoriatic', 'Arthritis, Rheumatoid', 'Crohn Disease', 'Inflammation', 'Psoriasis', 'Spondylitis, Ankylosing'] *carbamazepine response - Dosage => ['Epilepsy'] *boceprevir, peginterferon alfa-2a, peginterferon alfa-2b and ribavirin response - Efficacy => ['Hepatitis C, Chronic'] *nevirapine response - Metabolism/PK => ['HIV Infections'] PARP Inhibitor response => ['Cancer'] *warfarin response - Toxicity => ['Hemorrhage', 'over-anticoagulation'] *capecitabine response - Toxicity => ['Neoplasms'] Azathioprine intolerance => ['myasthenia gravis'] Corticosteroid response => ['Minimal change disease'] mTOR Inhibitor response => ['Cancer'] *ribavirin response - Efficacy => ['HIV Infections', 'Hepatitis C'] Gentamicin response => ['Bacterial infection', 'Neonatal sepsis'] Androgen deprivation therapy response => ['Prostate neoplasm'] *succinylcholine response - Toxicity => ['Malignant Hyperthermia'] VEGF Inhibitors response => ['Cancer'] all trans retinoic acid (ATRA) response => ['Acute promyelocytic leukemia'] *tacrolimus response - Metabolism/PK => ['Kidney Transplantation', 'Proteinuria', 'liver transplantation'] Vemurafenib-Cobimetinib Response => ['Melanoma'] Corticosteroid response => ['Focal segmental glomerulosclerosis'] Trametinib-Dabrafenib Response => ['Melanoma'] *gentamicin response - Toxicity => ['Ototoxicity'] *aminoglycoside antibacterials response - Toxicity => ['Ototoxicity'] *clopidogrel response - Dosage, Efficacy, Toxicity/ADR => ['Acute coronary syndrome', 'Coronary Artery Disease', 'Myocardial Infarction'] Gemcitabine response => ['non-small cell lung cancer'] Corticosteroid response => ['Nephrotic syndrome'] *kanamycin response - Toxicity => ['Ototoxicity'] Pazopanib response => ['malignant granular cell tumor'] *ivacaftor response - Efficacy => ['Cystic Fibrosis'] *methotrexate response - Efficacy => ['Arthritis, Rheumatoid'] *erlotinib response - Efficacy => ['Adenocarcinoma', 'Carcinoma, Non-Small-Cell Lung', 'Drug Resistance', 'Lung Neoplasms'] *amikacin response - Toxicity => ['Ototoxicity'] *isoflurane response - Toxicity => ['Malignant Hyperthermia'] Gefitinib Response => ['Non-small cell lung carcinoma'] Erlotinib Response => ['Non-small cell lung carcinoma'] None => ['Leukemia', 'Inflammatory bowel disease', 'Rheumatoid arthritis', 'Non-Hodgkin lymphoma'] *gefitinib response - Efficacy => ['Carcinoma, Non-Small-Cell Lung'] *sevoflurane response - Toxicity => ['Malignant Hyperthermia'] Tamoxifen response => ['Breast cancer'] *irinotecan response - Toxicity => ['Neutropenia'] *peginterferon alfa-2a response - Efficacy => ['HIV Infections', 'Hepatitis C'] Doxorubicin response => [None] Prednisolone response => ['Focal segmental glomerulosclerosis 2'] Suxamethonium response - slow metabolism => ['Butyrylcholinesterase deficiency']
print(f'Out of {n} records, found {count_all} with drug response & disease relationship ({count_pgkb} from PharmGKB).')
Out of 4970 records, found 576 with drug response & disease relationship (361 from PharmGKB).
count_all = 0
count_pgkb = 0
for raw_cvs_xml in iterate_cvs_from_xml(drug_xml):
elts = find_elements(raw_cvs_xml, './ClinVarAssertion/TraitSet/Trait')
for e in elts:
if e.attrib['Type'] == 'DrugResponse':
name = get_name(e)
if name and 'efficacy' in name.lower():
count_all += 1
if is_pgkb(raw_cvs_xml):
count_pgkb += 1
print(f'Out of {n} records, found {count_all} with efficacy phenotype ({count_pgkb} from PharmGKB).')
Out of 4970 records, found 54 with efficacy phenotype (54 from PharmGKB).
General PharmGKB notes:
import pandas as pd
import os
from IPython.display import display
pd.set_option('display.max_colwidth', None)
pharmgkb_root = '/home/april/projects/opentargets/pharmgkb'
clinical_annotations = pd.read_csv(os.path.join(pharmgkb_root, 'clinical', 'clinical_annotations.tsv'), sep='\t')
clinical_alleles = pd.read_csv(os.path.join(pharmgkb_root, 'clinical', 'clinical_ann_alleles.tsv'), sep='\t')
clinical_evidence = pd.read_csv(os.path.join(pharmgkb_root, 'clinical', 'clinical_ann_evidence.tsv'), sep='\t')
len(clinical_annotations)
5013
def show_id(i):
for t in (clinical_annotations[clinical_annotations['Clinical Annotation ID'] == i],
clinical_alleles[clinical_alleles['Clinical Annotation ID'] == i],
clinical_evidence[clinical_evidence['Clinical Annotation ID'] == i]):
display(t)
Two examples: one with RS ID (981755803) and one with star allele only (1451243980)
show_id(981755803)
Clinical Annotation ID | Variant/Haplotypes | Gene | Level of Evidence | Level Override | Level Modifiers | Score | Phenotype Category | PMID Count | Evidence Count | Drug(s) | Phenotype(s) | Latest History Date (YYYY-MM-DD) | URL | Specialty Population | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 981755803 | rs75527207 | CFTR | 1A | NaN | Rare Variant; Tier 1 VIP | 234.875 | Efficacy | 28 | 30 | ivacaftor | Cystic Fibrosis | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/981755803 | Pediatric |
Clinical Annotation ID | Genotype/Allele | Annotation Text | Allele Function | |
---|---|---|---|---|
0 | 981755803 | AA | Patients with the rs75527207 AA genotype (two copies of the CFTR G551D variant) and cystic fibrosis may respond to ivacaftor treatment. FDA-approved drug labeling information and CPIC guidelines indicate use of ivacaftor in cystic fibrosis patients with at least one copy of a list of 33 CFTR genetic variants, including G551D. Other genetic and clinical factors may also influence response to ivacaftor. | NaN |
1 | 981755803 | AG | Patients with the rs75527207 AG genotype (one copy of the CFTR G551D variant) and cystic fibrosis may respond to ivacaftor treatment. FDA-approved drug labeling information and CPIC guidelines indicate use of ivacaftor in cystic fibrosis patients with at least one copy of a list of 33 CFTR genetic variants, including G551D. Other genetic and clinical factors may also influence response to ivacaftor. | NaN |
2 | 981755803 | GG | Patients with the rs75527207 GG genotype (do not have a copy of the CFTR G551D variant) and cystic fibrosis have an unknown response to ivacaftor treatment, as response may depend on the presence of other CFTR variants. FDA-approved drug labeling information and CPIC guidelines indicate use of ivacaftor in cystic fibrosis patients with at least one copy of a list of 33 CFTR genetic variants, including G551D. Other genetic and clinical factors may also influence response to ivacaftor. | NaN |
Clinical Annotation ID | Evidence ID | Evidence Type | Evidence URL | PMID | Summary | Score | |
---|---|---|---|---|---|---|---|
0 | 981755803 | PA166114461 | Guideline Annotation | https://www.pharmgkb.org/guidelineAnnotation/PA166114461 | NaN | Annotation of CPIC Guideline for ivacaftor and CFTR | 100 |
1 | 981755803 | PA166104890 | Label Annotation | https://www.pharmgkb.org/labelAnnotation/PA166104890 | NaN | Annotation of FDA Label for ivacaftor and CFTR | 100 |
2 | 981755803 | 981755665 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/981755665 | 21083385.0 | Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | 0.25 |
3 | 981755803 | 981755678 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/981755678 | 22047557.0 | Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | 2.0 |
4 | 981755803 | 982006840 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/982006840 | 23313410.0 | Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. | 0.25 |
5 | 981755803 | 982009991 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/982009991 | 23590265.0 | Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | 2.25 |
6 | 981755803 | 1043737597 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1043737597 | 23757359.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.0 |
7 | 981755803 | 1043737620 | Variant Functional Assay Annotation | https://www.pharmgkb.org/variantAnnotation/1043737620 | 23757361.0 | Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. | 0.0 |
8 | 981755803 | 1043737636 | Variant Functional Assay Annotation | https://www.pharmgkb.org/variantAnnotation/1043737636 | 23891399.0 | Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. | 0.0 |
9 | 981755803 | 1183629335 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1183629335 | 24066763.0 | Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. | 0.25 |
10 | 981755803 | 1448267532 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448267532 | 27745802.0 | Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | 1.5 |
11 | 981755803 | 1448423752 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1448423752 | 27773592.0 | Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | 0.875 |
12 | 981755803 | 1449191908 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449191908 | 25682022.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 0.25 |
13 | 981755803 | 1449192031 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1449192031 | 28651844.0 | Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. | 3.0 |
14 | 981755803 | 1449192055 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192055 | 28711222.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.25 |
15 | 981755803 | 1449192093 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192093 | 25311995.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 0.0 |
16 | 981755803 | 1449192439 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192439 | 28611235.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 1.5 |
17 | 981755803 | 1449192481 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192481 | 26135562.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.0 |
18 | 981755803 | 1449192494 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192494 | 25171465.0 | Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | 0.25 |
19 | 981755803 | 1449192576 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192576 | 25755212.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.0 |
20 | 981755803 | 1449192615 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192615 | 26568242.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.5 |
21 | 981755803 | 1449192709 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192709 | 25473543.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 0.25 |
22 | 981755803 | 1449192721 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1449192721 | 25145599.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 2.5 |
23 | 981755803 | 1450043422 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1450043422 | 23628510.0 | Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | 3.0 |
24 | 981755803 | 1184512440 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1184512440 | 25049054.0 | Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | 1.5 |
25 | 981755803 | 981755746 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/981755746 | 22942289.0 | Allele A is associated with increased response to ivacaftor. | This annotation is not used for clinical annotation scoring. |
26 | 981755803 | 981755699 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/981755699 | 19846789.0 | Allele A is associated with increased response to ivacaftor. | This annotation is not used for clinical annotation scoring. |
27 | 981755803 | 981755787 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/981755787 | 22293084.0 | Allele A is associated with increased response to ivacaftor. | This annotation is not used for clinical annotation scoring. |
28 | 981755803 | 1446903789 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1446903789 | 24461666.0 | Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | 2.5 |
29 | 981755803 | 1448099051 | Variant Drug Annotation | https://www.pharmgkb.org/variantAnnotation/1448099051 | 27158673.0 | Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | 2.0 |
show_id(1451243980)
Clinical Annotation ID | Variant/Haplotypes | Gene | Level of Evidence | Level Override | Level Modifiers | Score | Phenotype Category | PMID Count | Evidence Count | Drug(s) | Phenotype(s) | Latest History Date (YYYY-MM-DD) | URL | Specialty Population | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4996 | 1451243980 | CYP2B6*1, CYP2B6*2, CYP2B6*6, CYP2B6*18, CYP2B6*38 | CYP2B6 | 1A | NaN | Tier 1 VIP | 211.5 | Toxicity | 12 | 14 | efavirenz | HIV Infections | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1451243980 | NaN |
Clinical Annotation ID | Genotype/Allele | Annotation Text | Allele Function | |
---|---|---|---|---|
15404 | 1451243980 | *1 | The CYP2B6*1 allele is assigned as a normal function allele by CPIC. Patients carrying CYP2B6*1 allele in combination with another normal function allele may have decreased risk of adverse events (eg. liver toxicity or CNS side effects) when treated with efavirenz as compared to patients with a no or decreased function allele in combination with a normal or increased function allele or with two no or decreased function alleles. However, conflicting evidence has been reported. Other genetic and clinical factors may also influence the toxicity of efavirenz. | Normal function |
15405 | 1451243980 | *2 | The CYP2B6*2 allele is assigned as a normal function allele by CPIC. Patients carrying CYP2B6*2 allele in combination with another normal function allele may have decreased risk of adverse events (eg. liver toxicity or CNS side effects) when treated with efavirenz as compared to patients with a no or decreased function allele in combination with a normal or increased function allele or with two no or decreased function alleles. However, conflicting evidence has been reported. Other genetic and clinical factors may also influence the toxicity of efavirenz. | Normal function |
15406 | 1451243980 | *6 | The CYP2B6*6 allele is assigned as a decreased function allele by CPIC. Patients carrying the CYP2B6*6 allele in combination with a normal, decreased, no, or increased function allele may have increased risk of adverse events (eg. liver toxicity or CNS side effects) when treated with efavirenz as compared to patients with two normal function alleles. However, conflicting evidence has been reported. Other genetic and clinical factors may also influence toxicity of efavirenz. | Decreased function |
15407 | 1451243980 | *18 | The CYP2B6*18 allele is assigned as a no function allele by CPIC. Patients carrying the CYP2B6*18 allele in combination with a normal, decreased, no, or increased function allele may have increased risk of adverse events (eg. liver toxicity or CNS side effects) when treated with efavirenz as compared to patients with two normal function alleles. However, conflicting evidence has been reported. Other genetic and clinical factors may also influence toxicity of efavirenz. | No function |
15408 | 1451243980 | *38 | The CYP2B6*38 allele is assigned as a no function allele by CPIC. Patients carrying the CYP2B6*38 allele in combination with a normal, decreased, no, or increased function allele may have increased risk of adverse events (eg. liver toxicity or CNS side effects) when treated with efavirenz as compared to patients with two normal function alleles. Other genetic and clinical factors may also influence toxicity of efavirenz. | No function |
Clinical Annotation ID | Evidence ID | Evidence Type | Evidence URL | PMID | Summary | Score | |
---|---|---|---|---|---|---|---|
14695 | 1451243980 | PA166182603 | Guideline Annotation | https://www.pharmgkb.org/guidelineAnnotation/PA166182603 | NaN | Annotation of CPIC Guideline for efavirenz and CYP2B6 | 100 |
14696 | 1451243980 | PA166182846 | Guideline Annotation | https://www.pharmgkb.org/guidelineAnnotation/PA166182846 | NaN | Annotation of DPWG Guideline for efavirenz and CYP2B6 | 100 |
14697 | 1451243980 | 1451289240 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1451289240 | 25889207.0 | Allele C is not associated with increased likelihood of Central Nervous System Diseases when treated with efavirenz in people with HIV Infections as compared to allele T. | -1.5 |
14698 | 1451243980 | 1183634232 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1183634232 | 24080498.0 | Genotypes CC + CT are not associated with risk of Neurotoxicity Syndromes when treated with efavirenz in people with HIV Infections as compared to genotype TT. | -1.75 |
14699 | 1451243980 | 1184473287 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1184473287 | 24517233.0 | Genotype TT is associated with increased risk of Central Nervous System Diseases when treated with efavirenz in people with HIV Infections. | 2.0 |
14700 | 1451243980 | 1448636199 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448636199 | 28692529.0 | Genotype CC is associated with decreased likelihood of Drug Toxicity when treated with efavirenz in people with HIV Infections as compared to genotype TT. | 2.0 |
14701 | 1451243980 | 1448993810 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448993810 | 26715213.0 | Genotypes CC + CT are associated with decreased risk of Central Nervous System Diseases when treated with efavirenz in people with HIV Infections as compared to genotype TT. | 3.5 |
14702 | 1451243980 | 827707534 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/827707534 | 21862974.0 | CYP2B6 *6/*6 is associated with increased risk of drug-induced liver injury when treated with efavirenz in people with HIV as compared to CYP2B6 *1/*1. | 2.5 |
14703 | 1451243980 | 1184168515 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1184168515 | 23734829.0 | CYP2B6 *1 is not associated with Neurotoxicity Syndromes when treated with efavirenz in people with HIV as compared to CYP2B6 *6. | -1.5 |
14704 | 1451243980 | 1448993721 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448993721 | 22808112.0 | CYP2B6 *6 is associated with increased risk of Toxic liver disease when treated with efavirenz in people with HIV as compared to CYP2B6 *1/*1. | 2.25 |
14705 | 1451243980 | 1448993746 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448993746 | 27333947.0 | CYP2B6 *6/*6 is associated with increased risk of Long QT Syndrome when exposed to efavirenz in healthy individuals as compared to CYP2B6 *1/*1. | 1.75 |
14706 | 1451243980 | 1448994067 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1448994067 | 17686225.0 | CYP2B6 *2/*2 is associated with increased risk of Central Nervous System Diseases when treated with efavirenz in people with HIV as compared to CYP2B6 *1/*1. | 0.25 |
14707 | 1451243980 | 1449156721 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1449156721 | 23640958.0 | CYP2B6 *6 + *38 are associated with increased risk of Neurotoxicity Syndromes when treated with efavirenz as compared to CYP2B6 *1/*1. | 0.0 |
14708 | 1451243980 | 1449156770 | Variant Phenotype Annotation | https://www.pharmgkb.org/variantAnnotation/1449156770 | 24359841.0 | CYP2B6 *6/*6 is associated with increased likelihood of Toxic liver disease when treated with efavirenz in people with HIV as compared to CYP2B6 *1/*1. | 2.0 |
New data model extracted from PharmKGB clinical annotations download file:
Efficacy
and has associated Phenotypes
clinical_annotations.columns
Index(['Clinical Annotation ID', 'Variant/Haplotypes', 'Gene', 'Level of Evidence', 'Level Override', 'Level Modifiers', 'Score', 'Phenotype Category', 'PMID Count', 'Evidence Count', 'Drug(s)', 'Phenotype(s)', 'Latest History Date (YYYY-MM-DD)', 'URL', 'Specialty Population'], dtype='object')
# Filter by efficacy
efficacy_annotations = clinical_annotations[clinical_annotations['Phenotype Category'] == 'Efficacy']
# Keep relevant columns
efficacy_annotations = efficacy_annotations[
['Clinical Annotation ID', 'Variant/Haplotypes', 'Gene',
'Level of Evidence', 'Drug(s)', 'Phenotype(s)']]
len(efficacy_annotations)
1931
# Join on alleles data
efficacy_with_alleles = efficacy_annotations.set_index('Clinical Annotation ID').join(clinical_alleles.set_index('Clinical Annotation ID'))
efficacy_with_alleles
Variant/Haplotypes | Gene | Level of Evidence | Drug(s) | Phenotype(s) | Genotype/Allele | Annotation Text | Allele Function | |
---|---|---|---|---|---|---|---|---|
Clinical Annotation ID | ||||||||
613979021 | rs1042714 | ADRB2 | 3 | carvedilol | Heart Failure | CC | Patients with the CC genotype and heart failure may have a poorer response to carvedilol treatment as compared to patients with the CG or GG genotype. Other genetic and clinical factors may also influence a patient's chance of response. | NaN |
613979021 | rs1042714 | ADRB2 | 3 | carvedilol | Heart Failure | CG | Patients with the CG genotype and heart failure may have a poorer response to carvedilol treatment as compared to patients with the GG genotype and a better response as compared to patients with the CC genotype. Patients with the CG genotype may still be at risk for non-response to carvedilol treatment based on their genotype. Other genetic and clinical factors may also influence a patient's chance of response. | NaN |
613979021 | rs1042714 | ADRB2 | 3 | carvedilol | Heart Failure | GG | Patients with the GG genotype and heart failure may have a better response to carvedilol treatment as compared to patients with the CC or CG genotype. Patients with the GG genotype may still be at risk for non-response to carvedilol treatment based on their genotype. Other genetic and clinical factors may also influence a patient's chance of response. | NaN |
613979403 | rs5443 | GNB3 | 3 | sumatriptan | Cluster Headache | CC | Patients with the CC genotype and cluster headache who are treated with triptans may be less likely to have reduced pain or attack frequency as compared to patients with the CT genotype. Other genetic and clinical factors may also influence a patient's response to sumatriptan. | NaN |
613979403 | rs5443 | GNB3 | 3 | sumatriptan | Cluster Headache | CT | Patients with the CT genotype and cluster headache who are treated with triptans may be more likely to have reduced pain or attack frequency as compared to patients with the CC genotype. Other genetic and clinical factors may also influence a patient's response to sumatriptan. | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... |
1451868520 | rs11198893 | GRK5 | 3 | Beta Blocking Agents | Coronary Artery Disease | AG | Patients with the rs11198893 AG genotype and coronary artery disease may have decreased response when treated with beta blocking agents as compared to patients with the GG genotype. Other genetic and clinical factors may also influence response to beta blocking agents. | NaN |
1451868520 | rs11198893 | GRK5 | 3 | Beta Blocking Agents | Coronary Artery Disease | GG | Patients with the rs11198893 GG genotype and coronary artery disease may have increased response when treated with beta blocking agents as compared to patients with the AA or AG genotypes. Other genetic and clinical factors may also influence response to beta blocking agents. | NaN |
1451868540 | rs4752292 | GRK5 | 3 | Beta Blocking Agents | Coronary Artery Disease | GG | Patients with the rs4752292 GG genotype and coronary artery disease may have increased response when treated with beta blocking agents as compared to patients with the TT or GT genotypes. Other genetic and clinical factors may also influence response to beta blocking agents. | NaN |
1451868540 | rs4752292 | GRK5 | 3 | Beta Blocking Agents | Coronary Artery Disease | GT | Patients with the rs4752292 GT genotype and coronary artery disease may have decreased response when treated with beta blocking agents as compared to patients with the GG genotype. Other genetic and clinical factors may also influence response to beta blocking agents. | NaN |
1451868540 | rs4752292 | GRK5 | 3 | Beta Blocking Agents | Coronary Artery Disease | TT | Patients with the rs4752292 TT genotype and coronary artery disease may have decreased response when treated with beta blocking agents as compared to patients with the GG genotype. Other genetic and clinical factors may also influence response to beta blocking agents. | NaN |
5881 rows × 8 columns
# Number of alleles (as opposed to variants)
len(efficacy_with_alleles)
5881
# Number of entries with allele function
len(efficacy_with_alleles[pd.notna(efficacy_with_alleles['Allele Function'])])
126
# Number of entries with RS
len(efficacy_with_alleles[efficacy_with_alleles['Variant/Haplotypes'].str.contains('rs')])
5659
import re
# Can use Clinical Annotation ID which should appear in xrefs
all_pgkb_ids = []
for raw_cvs_xml in iterate_cvs_from_xml(drug_xml):
if is_pgkb(raw_cvs_xml):
record = ClinVarRecord(find_mandatory_unique_element(raw_cvs_xml, 'ReferenceClinVarAssertion'))
if record.measure:
# this is the soundest approach
pgkb_ids = [
int(elem.attrib['ID'])
for elem in find_elements(record.measure.measure_xml, './XRef[@DB="PharmGKB Clinical Annotation"]')
]
if not pgkb_ids:
# this yields a lot of redundancy
pgkb_ids = [
int(re.split(r'[a-zA-Z]+', elem.attrib['ID'])[0])
for elem in find_elements(record.measure.measure_xml, './XRef[@DB="PharmGKB"]')
]
if not pgkb_ids:
# this is stupid - probably don't do this
pgkb_ids = [
int(elem.text.split('/')[-1])
for elem in find_elements(raw_cvs_xml, './ClinVarAssertion/ClinicalSignificance/Citation/URL')
]
if not pgkb_ids:
pprint(raw_cvs_xml)
break
all_pgkb_ids.extend(pgkb_ids)
len(all_pgkb_ids)
2000
# Cf. 401 records with PGKB submissions
len(set(all_pgkb_ids))
167
clinical_annotations[clinical_annotations['Clinical Annotation ID'].isin(set(all_pgkb_ids))]
Clinical Annotation ID | Variant/Haplotypes | Gene | Level of Evidence | Level Override | Level Modifiers | Score | Phenotype Category | PMID Count | Evidence Count | Drug(s) | Phenotype(s) | Latest History Date (YYYY-MM-DD) | URL | Specialty Population | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 981755803 | rs75527207 | CFTR | 1A | NaN | Rare Variant; Tier 1 VIP | 234.875 | Efficacy | 28 | 30 | ivacaftor | Cystic Fibrosis | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/981755803 | Pediatric |
3 | 1449191690 | rs141033578 | CFTR | 1A | NaN | Rare Variant; Tier 1 VIP | 200.000 | Efficacy | 1 | 3 | ivacaftor | Cystic Fibrosis | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1449191690 | NaN |
4 | 1449191746 | rs78769542 | CFTR | 1A | NaN | Rare Variant; Tier 1 VIP | 200.000 | Efficacy | 1 | 3 | ivacaftor | Cystic Fibrosis | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1449191746 | NaN |
27 | 655386913 | CYP2C19*1, CYP2C19*17 | CYP2C19 | 3 | NaN | Tier 1 VIP | 6.000 | Toxicity | 15 | 16 | clopidogrel | Acute coronary syndrome;Coronary Artery Disease;Hemorrhage;Myocardial Infarction | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/655386913 | NaN |
159 | 981201854 | rs28399499 | CYP2B6 | 3 | NaN | Tier 1 VIP | 5.250 | Metabolism/PK | 7 | 7 | nevirapine | HIV Infections | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/981201854 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4531 | 1451237940 | rs9923231 | VKORC1 | 1A | NaN | Tier 1 VIP | 117.000 | Dosage | 10 | 11 | phenprocoumon | NaN | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1451237940 | Pediatric |
4533 | 1451243676 | rs9923231 | VKORC1 | 2A | NaN | Tier 1 VIP | 8.250 | Toxicity | 3 | 4 | phenprocoumon | Hemorrhage;over-anticoagulation;time above therapeutic range | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1451243676 | NaN |
4535 | 1451245360 | rs1051266 | SLC19A1 | 2A | NaN | Tier 1 VIP | 14.125 | Efficacy | 9 | 10 | methotrexate | Arthritis, Rheumatoid | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1451245360 | NaN |
4762 | 1449191758 | rs75541969 | CFTR | 1A | NaN | Rare Variant; Tier 1 VIP | 200.000 | Efficacy | 1 | 3 | ivacaftor | Cystic Fibrosis | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1449191758 | NaN |
5001 | 1451289660 | rs59086055 | DPYD | 1A | NaN | Rare Variant; Tier 1 VIP | 100.000 | Toxicity | 1 | 2 | fluorouracil | Neoplasms | 2021-03-24 | https://www.pharmgkb.org/clinicalAnnotation/1451289660 | NaN |
161 rows × 15 columns
e.g. CYP2D6 - corresponds to
specific combinations of single nucleotide polymorphisms (SNPs) and/or small insertions and deletions (indels).... In addition, the CYP2D6 gene locus contains a number of complex structural variants including full gene deletions, gene duplications and multiplications [via]
CYP2D6*1
is the reference allele, CYP2D6*(gene variant)XN
, refers to N
copies of the gene.
Nomenclature is really heterogeneous, compare HLA - there are lots of rabbit holes we could go down!!
Conversion to rs / hgvs? e.g. in PharmVar
no_rs = efficacy_with_alleles[~efficacy_with_alleles['Variant/Haplotypes'].str.contains('rs')]['Variant/Haplotypes'].tolist()
set(no_rs)
{'CYP2B6*1, CYP2B6*4, CYP2B6*5, CYP2B6*6, CYP2B6*7', 'CYP2B6*1, CYP2B6*5', 'CYP2B6*1, CYP2B6*6', 'CYP2C19*1, CYP2C19*2', 'CYP2C19*1, CYP2C19*2, CYP2C19*3', 'CYP2C19*1, CYP2C19*2, CYP2C19*3, CYP2C19*17', 'CYP2C8*1, CYP2C8*2, CYP2C8*3, CYP2C8*4', 'CYP2C8*1, CYP2C8*3', 'CYP2C9*1, CYP2C9*2, CYP2C9*3', 'CYP2C9*1, CYP2C9*2, CYP2C9*3, CYP2C9*13, CYP2C9*14', 'CYP2C9*1, CYP2C9*3', 'CYP2D6*1, CYP2D6*10', 'CYP2D6*1, CYP2D6*1xN', 'CYP2D6*1, CYP2D6*1xN, CYP2D6*2, CYP2D6*2xN, CYP2D6*3, CYP2D6*4, CYP2D6*6', 'CYP2D6*1, CYP2D6*1xN, CYP2D6*2, CYP2D6*2xN, CYP2D6*4, CYP2D6*5, CYP2D6*10, CYP2D6*35xN', 'CYP2D6*1, CYP2D6*1xN, CYP2D6*2xN', 'CYP2D6*1, CYP2D6*2, CYP2D6*2xN, CYP2D6*3, CYP2D6*4, CYP2D6*6', 'CYP2D6*1, CYP2D6*2, CYP2D6*3, CYP2D6*4, CYP2D6*5, CYP2D6*6, CYP2D6*7, CYP2D6*9, CYP2D6*10, CYP2D6*10x2, CYP2D6*11, CYP2D6*17, CYP2D6*21, CYP2D6*36, CYP2D6*41', 'CYP2D6*1, CYP2D6*3, CYP2D6*4', 'CYP2D6*1, CYP2D6*3, CYP2D6*4, CYP2D6*5, CYP2D6*6, CYP2D6*10, CYP2D6*17', 'CYP2D6*1, CYP2D6*4', 'CYP2D6*1, CYP2D6*4, CYP2D6*5, CYP2D6*6, CYP2D6*17, CYP2D6*40', 'CYP2D6*5, CYP2D6*17', 'CYP3A4*1, CYP3A4*22', 'CYP3A4*1, CYP3A4*36', 'CYP3A4*1, CYP3A4*4', 'CYP3A5*1, CYP3A5*3', 'GSTM1 non-null, GSTM1 null', 'GSTT1 non-null, GSTT1 null', 'HLA-B*15:01:01:01', 'HLA-B*38:01:01', 'HLA-B*44:02:01:01', 'HLA-C*01:02:01, HLA-C*02:02:01, HLA-C*03:02, HLA-C*04:01:01:01, HLA-C*05:01:01:01, HLA-C*06:02:01:01, HLA-C*07:01:01, HLA-C*08:01, HLA-C*12:02:01, HLA-C*14:02:01, HLA-C*15:02:01, HLA-C*16:01:01, HLA-C*17:01:01:01', 'HLA-C*06:02:01:01', 'HLA-DRB1*04:01:01', 'NAT2*4, NAT2*5D, NAT2*6B, NAT2*7A, NAT2*12A, NAT2*13A, NAT2*14A', 'SLC6A4 HTTLPR long form (L allele), SLC6A4 HTTLPR short form (S allele)', 'SLCO1B1*1, SLCO1B1*14', 'TPMT*1, TPMT*3B, TPMT*3C', 'UGT1A1*1, UGT1A1*28', 'UGT1A1*60', 'UGT1A3*1, UGT1A3*2', 'UGT2B15*1, UGT2B15*2'}
import requests
def get_pharmvar_result(allele):
return requests.get(f'https://www.pharmvar.org/api-service/alleles/{allele}').json()
get_pharmvar_result('CYP2C9*2')
[{'geneSymbol': 'CYP2C9', 'alleleName': 'CYP2C9*2', 'pvId': 'PV00538', 'legacyLabel': None, 'coreAllele': None, 'evidenceLevel': '0', 'description': None, 'function': 'decreased function', 'activeInd': True, 'references': [{'citation': 'Rettie et al. 1994', 'url': 'http://www.ncbi.nlm.nih.gov/pubmed/8004131'}, {'citation': 'Crespi et al. 1997', 'url': 'http://www.ncbi.nlm.nih.gov/pubmed/9241660'}, {'citation': 'deposited by Gaedigk et al.', 'url': None}, {'citation': 'King et al. 2004', 'url': 'http://www.ncbi.nlm.nih.gov/pubmed/15608560'}, {'citation': 'Takahashi et al. 2004', 'url': 'http://www.ncbi.nlm.nih.gov/pubmed/15070684'}, {'citation': 'deposited by Campos et al.', 'url': None}], 'variants': [{'referenceSequence': 'NC_000010.11', 'referenceLocation': 'Sequence Start', 'referenceCollections': ['GRCh38'], 'hgvs': 'NC_000010.11:g.94942290C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/29', 'variantId': '8', 'position': 'NC_000010.11:g.94942290C>T'}, {'referenceSequence': 'NC_000010.10', 'referenceLocation': 'Sequence Start', 'referenceCollections': ['GRCh37'], 'hgvs': 'NC_000010.10:g.96702047C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/31', 'variantId': '8', 'position': 'NC_000010.10:g.96702047C>T'}, {'referenceSequence': 'NM_000771.4', 'referenceLocation': 'Sequence Start', 'referenceCollections': ['RefSeqTranscript'], 'hgvs': 'NM_000771.4:c.430C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/13748', 'variantId': '8', 'position': 'NM_000771.4:c.455C>T'}, {'referenceSequence': 'NM_000771.4', 'referenceLocation': 'ATG Start', 'referenceCollections': ['RefSeqTranscript'], 'hgvs': 'NM_000771.4:c.430C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/13747', 'variantId': '8', 'position': 'NM_000771.4:c.430C>T'}, {'referenceSequence': 'NG_008385.2', 'referenceLocation': 'ATG Start', 'referenceCollections': ['RefSeqGene'], 'hgvs': 'NG_008385.2:g.9133C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/13590', 'variantId': '8', 'position': 'NG_008385.2:g.3608C>T'}, {'referenceSequence': 'NG_008385.2', 'referenceLocation': 'Sequence Start', 'referenceCollections': ['RefSeqGene'], 'hgvs': 'NG_008385.2:g.9133C>T', 'rsId': 'rs1799853', 'impact': 'R144C', 'variantFrequency': [{'source': '1000Genomes', 'frequency': 0.047923}, {'source': 'GnomAD', 'frequency': 0.092016}], 'url': 'https://www.pharmvar.org/variant/13589', 'variantId': '8', 'position': 'NG_008385.2:g.9133C>T'}], 'alleleType': 'Core', 'url': 'https://www.pharmvar.org/haplotype/PV00538', 'hgvs': 'NG_008385.2:g.9133C>T', 'variantGroups': []}]
get_pharmvar_result('NAT2*6')
{'errorMessage': 'Allele NAT2*6 could not be located in the PharmVar database.', 'errorCode': 404}
Thinking both about PharmGKB data and the more general question of other data sources. Options: