#!/usr/bin/env python # coding: utf-8 # In[16]: get_ipython().run_cell_magic('bash', '', '\n#Create an installation directory\nmkdir src\ncd src\n\n#Take a clone of BioPerl from GitHub and change to the correct version:\ngit clone https://github.com/bioperl/bioperl-live.git\ncd bioperl-live\ngit checkout bioperl-release-1-6-1\ncd -\n\n#Install the Ensembl git tools to allow easy management of the Ensembl repos.\ngit clone https://github.com/Ensembl/ensembl-git-tools.git\nexport PATH=$PWD/ensembl-git-tools/bin:$PATH\n\n#Install the APIs that you need. You can install all the APIs using the git ensembl command:\ngit ensembl --clone api\n') # In[19]: snps = ['rs1333049', 'rs2248462', 'rs225190', 'rs225212', 'rs2252865', 'rs2252996'] with open('SNP_list_sorted.txt', 'w') as write_file: write_file.write('\n'.join(sorted(snps))) # In[20]: get_ipython().run_cell_magic('perl', '', '\nuse lib "src/bioperl-live";\nuse lib "src/ensembl/modules";\nuse lib "src/ensembl-compara/modules";\nuse lib "src/ensembl-variation/modules";\nuse lib "src/ensembl-funcgen/modules";\n\nuse strict;\nuse warnings;\nuse Bio::EnsEMBL::Registry;\n\nopen (IN, "SNPs_in_LD.tsv");\n\nmy $reg = \'Bio::EnsEMBL::Registry\';\n$reg->load_registry_from_db(-host => \'ensembldb.ensembl.org\',-user => \'anonymous\');\nmy $va = $reg->get_adaptor( \'human\', \'variation\', \'variation\' );\n\nwhile () {\n print $_;\n my $v = $va->fetch_by_name($_);\n if (!$v) {\n print \'unfetched \' . $_;\n next;\n }\n my @vfs = @{ $v->get_all_VariationFeatures };\n \n foreach my $vf (@vfs){ \n my $ld = $vf->get_all_LD_values;\n my @pops = @{ $vf->get_all_LD_Populations };\n my @ldvs = @{ $ld->get_variations };\n \n foreach my $pop (@pops) {\n \n if ($pop->name =~ /1000GENOMES/) {\n \n foreach my $ldv (@ldvs) { \n if ($ldv->stable_id ne $_) {\n my @ldvfs = @{ $ldv->get_all_VariationFeatures };\n \n foreach my $ldvf (@ldvfs) {\n my @tvs = @{ $ldvf->get_all_TranscriptVariations };\n my $r2 = $ld->get_r_square($vf, $ldvf, $pop); \n \n foreach my $tv (@tvs) {\n my $gene = $tv->transcript->get_Gene;\n \n if ($r2) { \n print OUT $v->stable_id, "\\t", $ldv->stable_id, "\\t", $gene->external_name, "\\t", $r2, "\\t", $pop->name, "\\n";\n }\n }\n }\n }\n }\n }\n }\n }\n }\n') # In[ ]: