#!/usr/bin/env python # coding: utf-8 # ### Running in Docker container on Ostrich # # #### Started Docker container with the following command: # # ```docker run -p 8888:8888 -v /Users/sam/data/:/data -v /Users/sam/owl_home/:/owl_home -v /Users/sam/owl_web/:/owl_web -v /Users/sam/gitrepos:/gitrepos -it f99537d7e06a``` # # The command allows access to Jupyter Notebook over port 8888 and makes my Jupyter Notebook GitHub repo and my data files on Owl/home and Owl/web accessible to the Docker container. # # Once the container was started, started Jupyter Notebook with the following command inside the Docker container: # # ```jupyter notebook``` # # This is configured in the Docker container to launch a Jupyter Notebook without a browser on port 8888. # # The Docker container is running on an image created from this [Dockerfile (Git commit 443bc42)](https://github.com/sr320/LabDocs/blob/443bc425cd36d23a07cf12625f38b7e3a397b9be/code/dockerfiles/Dockerfile.bio) # In[1]: get_ipython().run_cell_magic('bash', '', 'date\n') # ### Check computer specs # In[2]: get_ipython().run_cell_magic('bash', '', 'hostname\n') # In[3]: get_ipython().run_cell_magic('bash', '', 'lscpu\n') # In[4]: cd /gitrepos/Reseqtools/iTools_Code/ # ### Double check files before beginning # Check for raw FASTQ files # In[5]: ls /data/oly_gbs_raw/ # Verify index file looks OK. # In[6]: get_ipython().run_cell_magic('bash', '', 'head /data/oly_gbs_raw/index.lst\n') # Whoops! That's not right! I forgot I moved the corrected files to my paper repo... # In[7]: ls /gitrepos/paper_oly_gbs/data/ # In[8]: get_ipython().run_cell_magic('bash', '', 'head /gitrepos/paper_oly_gbs/data/index.lst\nhead /gitrepos/paper_oly_gbs/data/enzyme.txt\n') # Looks good! # ### Run BGI demultiplexing script # In[9]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/dataenzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split/\n') # Ugh. Ran it to the same directory as I did the last time I ran the script! Need to change to a different directory... # # This also means that some of the files from the first run are probably screwed up! # # However, the goal of this is to compare a subset of the output and see if the two runs produced different results. Don't need all the files in order to do that comparison. # In[10]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/dataenzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split2/\n') # In[ ]: get_ipython().run_cell_magic('bash', '', 'ls -lh /data/oly_gbs_raw/split/ | head\n') # Nope. I screwed it up. The demultplexing I ran in Step 9 overwrote everything from the first run! # # Well, let's run it again so we have files to compare... # In[ ]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/dataenzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split/\n') # Arrived to lab this morning to disconnects errors. Let's see if these worked... # In[1]: get_ipython().run_cell_magic('bash', '', 'ls -lh /data/oly_gbs_raw/split/ | head\n') # In[2]: get_ipython().run_cell_magic('bash', '', 'date\n') # In[3]: get_ipython().run_cell_magic('bash', '', 'ls -lh /data/oly_gbs_raw/split2/\n') # OMG! There's a typo in the both of the demultiplexing scripts above! The path to the enzyme list is incorrect... :( Let's run them again... # # It certainly would be nice if some checks were built into this program (and, it's be nice if I was paying more attention)! # In[4]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/data/enzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split2/\n') # In[5]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/data/enzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split/\n') # In[6]: cd /gitrepos/Reseqtools/iTools_Code/ # In[7]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/data/enzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split/\n') # In[8]: get_ipython().run_cell_magic('bash', '', 'time ./iTools Fqtools splitpool \\\n-InFq1 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_1.fq.gz \\\n-InFq2 /data/oly_gbs_raw/160123_I132_FCH3YHMBBXX_L4_OYSzenG1AAD96FAAPEI-109_2.fq.gz \\\n-Index /gitrepos/paper_oly_gbs/data/index.lst \\\n-Flag /gitrepos/paper_oly_gbs/data/enzyme.txt \\\n-MisMatch \\\n-OutDir /data/oly_gbs_raw/split2/\n') # In[ ]: