cd /Users/Sam
/Users/Sam
cd ../../Applications
/Applications
cd /Applications/ncbi-blast-2.2.29+/bin
/Applications/ncbi-blast-2.2.29+/bin
!./blastn -task blastn -query /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -db /Volumes/homes/srlab/blastdbs/PhageGBnuc20130529filter -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt
!head -10 /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt
gi|396576808|emb|FR775895.2| Enterobacteria phage phi92, complete genome Contig5DeNovoAssembly gi|396576808|emb|FR775895.2| 87.88 33 2 2 3 34 76644 76613 0.32 35.6 gi|15281680|gb|AF396866.1| Bacteriophage Mx8, complete genome Contig5DeNovoAssembly gi|15281680|gb|AF396866.1| 95.00 20 1 0 17 36 24781 24762 3.9 31.9 gi|15320570|ref|NC_003085.1| Myxococcus phage Mx8, complete genome Contig5DeNovoAssembly gi|15320570|ref|NC_003085.1| 95.00 20 1 0 17 36 24781 24762 3.9 31.9 gi|312262424|gb|GU396103.1| Aeromonas phage PX29, complete genome Contig6DeNovoAssembly gi|312262424|gb|GU396103.1| 100.00 19 0 0 42 60 200091 200109 0.39 35.6 gi|254211614|gb|GQ334450.1| Cyanophage PSS2, complete genome Contig6DeNovoAssembly gi|254211614|gb|GQ334450.1| 100.00 17 0 0 18 34 56942 56926 4.7 31.9 gi|254729462|ref|NC_013021.1| Cyanophage PSS2, complete genome Contig6DeNovoAssembly gi|254729462|ref|NC_013021.1| 100.00 17 0 0 18 34 56942 56926 4.7 31.9 gi|310005390|gb|GU075905.1| Prochlorococcus phage P-HM2, complete genome Contig9DeNovoAssembly gi|310005390|gb|GU075905.1| 85.71 28 4 0 11 38 180126 180099 1.3 33.7 gi|326782972|ref|NC_015284.1| Prochlorococcus phage P-HM2, complete genome Contig9DeNovoAssembly gi|326782972|ref|NC_015284.1| 85.71 28 4 0 11 38 180126 180099 1.3 33.7 gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome Contig10DeNovoAssembly gi|311788808|gb|HQ336222.2| 95.45 22 1 0 23 44 342629 342608 0.26 35.6 gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome Contig10DeNovoAssembly gi|311788808|gb|HQ336222.2| 80.00 40 5 1 9 48 993827 993791 0.26 35.6
./blastn -h
File "<ipython-input-27-1213ea4d2e0b>", line 1 ./blastn -h ^ SyntaxError: invalid syntax
#can't remember column order of output file
!./blastn -h
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps int_value] [-sum_statistics] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.29+ Use '-help' to print detailed descriptions of command line arguments
#can't remember column order of output file
!./blastn -help
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps int_value] [-sum_statistics] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.29+ OPTIONAL ARGUMENTS -h Print USAGE and DESCRIPTION; ignore all other parameters -help Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters -version Print version number; ignore other arguments *** Input query options -query <File_In> Input file name Default = `-' -query_loc <String> Location on the query sequence in 1-based offsets (Format: start-stop) -strand <String, `both', `minus', `plus'> Query strand(s) to search against database/subject Default = `both' *** General search options -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast' 'megablast' 'rmblastn' > Task to execute Default = `megablast' -db <String> BLAST database name * Incompatible with: subject, subject_loc -out <File_Out> Output file name Default = `-' -evalue <Real> Expectation value (E) threshold for saving hits Default = `10' -word_size <Integer, >=4> Word size for wordfinder algorithm (length of best perfect match) -gapopen <Integer> Cost to open a gap -gapextend <Integer> Cost to extend a gap -penalty <Integer, <=0> Penalty for a nucleotide mismatch -reward <Integer, >=0> Reward for a nucleotide match -use_index <Boolean> Use MegaBLAST database index Default = `false' -index_name <String> MegaBLAST database index name *** BLAST-2-Sequences options -subject <File_In> Subject sequence(s) to search * Incompatible with: db, gilist, seqidlist, negative_gilist, db_soft_mask, db_hard_mask -subject_loc <String> Location on the subject sequence in 1-based offsets (Format: start-stop) * Incompatible with: db, gilist, seqidlist, negative_gilist, db_soft_mask, db_hard_mask, remote *** Formatting options -outfmt <String> alignment view options: 0 = pairwise, 1 = query-anchored showing identities, 2 = query-anchored no identities, 3 = flat query-anchored, show identities, 4 = flat query-anchored, no identities, 5 = XML Blast output, 6 = tabular, 7 = tabular with comment lines, 8 = Text ASN.1, 9 = Binary ASN.1, 10 = Comma-separated values, 11 = BLAST archive format (ASN.1) Options 6, 7, and 10 can be additionally configured to produce a custom format specified by space delimited format specifiers. The supported format specifiers are: qseqid means Query Seq-id qgi means Query GI qacc means Query accesion qaccver means Query accesion.version qlen means Query sequence length sseqid means Subject Seq-id sallseqid means All subject Seq-id(s), separated by a ';' sgi means Subject GI sallgi means All subject GIs sacc means Subject accession saccver means Subject accession.version sallacc means All subject accessions slen means Subject sequence length qstart means Start of alignment in query qend means End of alignment in query sstart means Start of alignment in subject send means End of alignment in subject qseq means Aligned part of query sequence sseq means Aligned part of subject sequence evalue means Expect value bitscore means Bit score score means Raw score length means Alignment length pident means Percentage of identical matches nident means Number of identical matches mismatch means Number of mismatches positive means Number of positive-scoring matches gapopen means Number of gap openings gaps means Total number of gaps ppos means Percentage of positive-scoring matches frames means Query and subject frames separated by a '/' qframe means Query frame sframe means Subject frame btop means Blast traceback operations (BTOP) staxids means unique Subject Taxonomy ID(s), separated by a ';' (in numerical order) sscinames means unique Subject Scientific Name(s), separated by a ';' scomnames means unique Subject Common Name(s), separated by a ';' sblastnames means unique Subject Blast Name(s), separated by a ';' (in alphabetical order) sskingdoms means unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order) stitle means Subject Title salltitles means All Subject Title(s), separated by a '<>' sstrand means Subject Strand qcovs means Query Coverage Per Subject qcovhsp means Query Coverage Per HSP When not provided, the default value is: 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore', which is equivalent to the keyword 'std' Default = `0' -show_gis Show NCBI GIs in deflines? -num_descriptions <Integer, >=0> Number of database sequences to show one-line descriptions for Not applicable for outfmt > 4 Default = `500' * Incompatible with: max_target_seqs -num_alignments <Integer, >=0> Number of database sequences to show alignments for Default = `250' * Incompatible with: max_target_seqs -html Produce HTML output? *** Query filtering options -dust <String> Filter query sequence with DUST (Format: 'yes', 'level window linker', or 'no' to disable) Default = `20 64 1' -filtering_db <String> BLAST database containing filtering elements (i.e.: repeats) -window_masker_taxid <Integer> Enable WindowMasker filtering using a Taxonomic ID -window_masker_db <String> Enable WindowMasker filtering using this repeats database. -soft_masking <Boolean> Apply filtering locations as soft masks Default = `true' -lcase_masking Use lower case filtering in query and subject sequence(s)? *** Restrict search or results -gilist <String> Restrict search of database to list of GI's * Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc -seqidlist <String> Restrict search of database to list of SeqId's * Incompatible with: gilist, negative_gilist, remote, subject, subject_loc -negative_gilist <String> Restrict search of database to everything except the listed GIs * Incompatible with: gilist, seqidlist, remote, subject, subject_loc -entrez_query <String> Restrict search with the given Entrez query * Requires: remote -db_soft_mask <String> Filtering algorithm ID to apply to the BLAST database as soft masking * Incompatible with: db_hard_mask, subject, subject_loc -db_hard_mask <String> Filtering algorithm ID to apply to the BLAST database as hard masking * Incompatible with: db_soft_mask, subject, subject_loc -perc_identity <Real, 0..100> Percent identity -culling_limit <Integer, >=0> If the query range of a hit is enveloped by that of at least this many higher-scoring hits, delete the hit * Incompatible with: best_hit_overhang, best_hit_score_edge -best_hit_overhang <Real, (>=0 and =<0.5)> Best Hit algorithm overhang value (recommended value: 0.1) * Incompatible with: culling_limit -best_hit_score_edge <Real, (>=0 and =<0.5)> Best Hit algorithm score edge value (recommended value: 0.1) * Incompatible with: culling_limit -max_target_seqs <Integer, >=1> Maximum number of aligned sequences to keep Not applicable for outfmt <= 4 Default = `500' * Incompatible with: num_descriptions, num_alignments *** Discontiguous MegaBLAST options -template_type <String, `coding', `coding_and_optimal', `optimal'> Discontiguous MegaBLAST template type * Requires: template_length -template_length <Integer, Permissible values: '16' '18' '21' > Discontiguous MegaBLAST template length * Requires: template_type *** Statistical options -dbsize <Int8> Effective length of the database -searchsp <Int8, >=0> Effective length of the search space -max_hsps <Integer, >=0> Set maximum number of HSPs per subject sequence to save (0 means no limit) Default = `0' -sum_statistics Use sum statistics *** Search strategy options -import_search_strategy <File_In> Search strategy to use * Incompatible with: export_search_strategy -export_search_strategy <File_Out> File name to record the search strategy used * Incompatible with: import_search_strategy *** Extension options -xdrop_ungap <Real> X-dropoff value (in bits) for ungapped extensions -xdrop_gap <Real> X-dropoff value (in bits) for preliminary gapped extensions -xdrop_gap_final <Real> X-dropoff value (in bits) for final gapped alignment -no_greedy Use non-greedy dynamic programming extension -min_raw_gapped_score <Integer> Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages -ungapped Perform ungapped alignment only? -window_size <Integer, >=0> Multiple hits window size, use 0 to specify 1-hit algorithm -off_diagonal_range <Integer, >=0> Number of off-diagonals to search for the 2nd hit, use 0 to turn off Default = `0' *** Miscellaneous options -parse_deflines Should the query and subject defline(s) be parsed? -num_threads <Integer, >=1> Number of threads (CPUs) to use in the BLAST search Default = `1' * Incompatible with: remote -remote Execute search remotely? * Incompatible with: gilist, seqidlist, negative_gilist, subject_loc, num_threads
#removed "pipe" delimeters, added column headings in LibreOffice
#renamed file: /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBntTAB.csv
#Now using Ubuntu. Had to copy this notebook to iPython local directory, as iPython would not open it from
#its location on Eagle. :(
#same with data file
#use sed to remove spaces in contig names for subsequent joining using SQLShare
!sed 's/ //g' /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369references.csv > /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv
!head -10 /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv
#Installed BLAST2.2.29+ on Linux
#Appending install location to the PATH
!cd ../..
pwd
u'/home/samb'
cd ..
/home
cd /
/
ls
bin/ etc/ lib/ mnt/ run/ tmp/ vmlinuz.old@ boot/ home/ lib64/ opt/ sbin/ usr/ cdrom/ initrd.img@ lost+found/ proc/ srv/ var/ dev/ initrd.img.old@ media/ root/ sys/ vmlinuz@
cd etc/profile
[Errno 20] Not a directory: 'etc/profile' /
cd /etc/profile
[Errno 20] Not a directory: '/etc/profile' /
cd etc
/etc
ls
acpi/ host.conf printcap@ adduser.conf hostname profile alternatives/ hostname~ profile.d/ anacrontab hostname.old protocols apg.conf hosts pulse/ apm/ hosts~ python/ apparmor/ hosts.allow python2.7/ apparmor.d/ hosts.deny python3/ apport/ hosts.old python3.3/ apt/ hp/ rc0.d/ aptdaemon/ ifplugd/ rc1.d/ at-spi2/ ImageMagick/ rc2.d/ avahi/ init/ rc3.d/ bash.bashrc init.d/ rc4.d/ bash_completion initramfs-tools/ rc5.d/ bash_completion.d/ inputrc rc6.d/ bindresvport.blacklist insserv/ rc.local* blkid.conf insserv.conf rcS.d/ blkid.tab@ insserv.conf.d/ remote-login-service.conf bluetooth/ iproute2/ resolvconf/ bonobo-activation/ issue resolv.conf@ brlapi.key issue.net rmt* brltty/ kbd/ rpc brltty.conf kernel/ rsyslog.conf ca-certificates/ kernel-img.conf rsyslog.d/ ca-certificates.conf kerneloops.conf samba/ calendar/ ldap/ sane.d/ chatscripts/ ld.so.cache securetty checkbox.d/ ld.so.conf security/ chromium-browser/ ld.so.conf.d/ selinux/ colord.conf legal sensors3.conf compizconfig/ libaudit.conf sensors.d/ console-setup/ libnl-3/ services cracklib/ libpaper.d/ sgml/ cron.d/ libreoffice/ shadow cron.daily/ lightdm/ shadow- cron.hourly/ lintianrc shells cron.monthly/ locale.alias signond.conf crontab localtime signon-ui/ cron.weekly/ logcheck/ skel/ cups/ login.defs sound/ cupshelpers/ logrotate.conf speech-dispatcher/ dbus-1/ logrotate.d/ ssh/ dconf/ lsb-release ssl/ debconf.conf ltrace.conf subgid debian_version magic subgid- default/ magic.mime subuid deluser.conf mailcap subuid- depmod.d/ mailcap.order sudoers dhcp/ manpath.config sudoers.d/ dhcp3/ mime.types sysctl.conf dictionaries-common/ mke2fs.conf sysctl.d/ dnsmasq.d/ modprobe.d/ systemd/ doc-base/ modules terminfo/ dpkg/ mtab thunderbird/ drirc mtab.fuselock timezone emacs/ mtools.conf timidity/ environment mysql/ ts.conf firefox/ nanorc ucf.conf fonts/ netscsid.conf udev/ foomatic/ network/ udisks2/ fstab NetworkManager/ ufw/ fstab.d/ networks updatedb.conf fuse.conf newt/ update-manager/ gai.conf nsswitch.conf update-motd.d/ gconf/ obex-data-server/ update-notifier/ gdb/ openal/ UPower/ ghostscript/ opt/ upstart-xsessions gnome/ os-release usb_modeswitch.conf gnome-app-install/ pam.conf usb_modeswitch.d/ gnome-settings-daemon/ pam.d/ vim/ gnome-vfs-2.0/ papersize vtrgb@ groff/ passwd wgetrc group passwd- wildmidi/ group- pcmcia/ wodim.conf grub.d/ perl/ wpa_supplicant/ gshadow pm/ X11/ gshadow- pnm2ppa.conf xdg/ gtk-2.0/ polkit-1/ xml/ gtk-3.0/ popularity-contest.conf xul-ext/ hdparm.conf ppp/ zsh_command_not_found
cd profile.d
/etc/profile.d
ls
bash_completion.sh vte.sh
!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh
/bin/sh: 1: cannot create myenvvars.sh: Permission denied
!sudo export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh
/bin/sh: 1: cannot create myenvvars.sh: Permission denied
!gedit bash_completion.sh
** (gedit:4854): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found
!gedit vte.sh
** (gedit:4874): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found
pwd
u'/etc/profile.d'
!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh
/bin/sh: 1: cannot create myenvvars.sh: Permission denied
ls -ld
drwxr-xr-x 2 root root 4096 Oct 16 12:02 ./
ls -l vte.sh
-rw-r--r-- 1 root root 1945 Jun 20 2013 vte.sh
#need to change permissions on this directory in order to write to it
#can't change permissions via iPython; will do this in terminal
#sudo chmod 757
#was 755, with root as owner and user
!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh
!echo $PATH
/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games
!echo $PATH
/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games
cd /
/
cd /etc/profile.d
/etc/profile.d
ls
bash_completion.sh vte.sh
!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh
#oddly, this didn't write any text to the file
#I also think I need the "#!/bin/bash" at the beginning of the script
#Added both to the script manually using gedit
ls
bash_completion.sh myenvvars.sh vte.sh
!echo $PATH
/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin
pwd
u'/home/samb'
#let's see if this worked
!./blastn -h
/bin/sh: 1: ./blastn: not found
blastn -help
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-6-f74ffb396a19> in <module>() ----> 1 blastn -help NameError: name 'blastn' is not defined
!blastn -help
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps int_value] [-sum_statistics] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.29+ OPTIONAL ARGUMENTS -h Print USAGE and DESCRIPTION; ignore all other parameters -help Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters -version Print version number; ignore other arguments *** Input query options -query <File_In> Input file name Default = `-' -query_loc <String> Location on the query sequence in 1-based offsets (Format: start-stop) -strand <String, `both', `minus', `plus'> Query strand(s) to search against database/subject Default = `both' *** General search options -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast' 'megablast' 'rmblastn' > Task to execute Default = `megablast' -db <String> BLAST database name * Incompatible with: subject, subject_loc -out <File_Out> Output file name Default = `-' -evalue <Real> Expectation value (E) threshold for saving hits Default = `10' -word_size <Integer, >=4> Word size for wordfinder algorithm (length of best perfect match) -gapopen <Integer> Cost to open a gap -gapextend <Integer> Cost to extend a gap -penalty <Integer, <=0> Penalty for a nucleotide mismatch -reward <Integer, >=0> Reward for a nucleotide match -use_index <Boolean> Use MegaBLAST database index Default = `false' -index_name <String> MegaBLAST database index name *** BLAST-2-Sequences options -subject <File_In> Subject sequence(s) to search * Incompatible with: db, gilist, seqidlist, negative_gilist, db_soft_mask, db_hard_mask -subject_loc <String> Location on the subject sequence in 1-based offsets (Format: start-stop) * Incompatible with: db, gilist, seqidlist, negative_gilist, db_soft_mask, db_hard_mask, remote *** Formatting options -outfmt <String> alignment view options: 0 = pairwise, 1 = query-anchored showing identities, 2 = query-anchored no identities, 3 = flat query-anchored, show identities, 4 = flat query-anchored, no identities, 5 = XML Blast output, 6 = tabular, 7 = tabular with comment lines, 8 = Text ASN.1, 9 = Binary ASN.1, 10 = Comma-separated values, 11 = BLAST archive format (ASN.1) Options 6, 7, and 10 can be additionally configured to produce a custom format specified by space delimited format specifiers. The supported format specifiers are: qseqid means Query Seq-id qgi means Query GI qacc means Query accesion qaccver means Query accesion.version qlen means Query sequence length sseqid means Subject Seq-id sallseqid means All subject Seq-id(s), separated by a ';' sgi means Subject GI sallgi means All subject GIs sacc means Subject accession saccver means Subject accession.version sallacc means All subject accessions slen means Subject sequence length qstart means Start of alignment in query qend means End of alignment in query sstart means Start of alignment in subject send means End of alignment in subject qseq means Aligned part of query sequence sseq means Aligned part of subject sequence evalue means Expect value bitscore means Bit score score means Raw score length means Alignment length pident means Percentage of identical matches nident means Number of identical matches mismatch means Number of mismatches positive means Number of positive-scoring matches gapopen means Number of gap openings gaps means Total number of gaps ppos means Percentage of positive-scoring matches frames means Query and subject frames separated by a '/' qframe means Query frame sframe means Subject frame btop means Blast traceback operations (BTOP) staxids means unique Subject Taxonomy ID(s), separated by a ';' (in numerical order) sscinames means unique Subject Scientific Name(s), separated by a ';' scomnames means unique Subject Common Name(s), separated by a ';' sblastnames means unique Subject Blast Name(s), separated by a ';' (in alphabetical order) sskingdoms means unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order) stitle means Subject Title salltitles means All Subject Title(s), separated by a '<>' sstrand means Subject Strand qcovs means Query Coverage Per Subject qcovhsp means Query Coverage Per HSP When not provided, the default value is: 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore', which is equivalent to the keyword 'std' Default = `0' -show_gis Show NCBI GIs in deflines? -num_descriptions <Integer, >=0> Number of database sequences to show one-line descriptions for Not applicable for outfmt > 4 Default = `500' * Incompatible with: max_target_seqs -num_alignments <Integer, >=0> Number of database sequences to show alignments for Default = `250' * Incompatible with: max_target_seqs -html Produce HTML output? *** Query filtering options -dust <String> Filter query sequence with DUST (Format: 'yes', 'level window linker', or 'no' to disable) Default = `20 64 1' -filtering_db <String> BLAST database containing filtering elements (i.e.: repeats) -window_masker_taxid <Integer> Enable WindowMasker filtering using a Taxonomic ID -window_masker_db <String> Enable WindowMasker filtering using this repeats database. -soft_masking <Boolean> Apply filtering locations as soft masks Default = `true' -lcase_masking Use lower case filtering in query and subject sequence(s)? *** Restrict search or results -gilist <String> Restrict search of database to list of GI's * Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc -seqidlist <String> Restrict search of database to list of SeqId's * Incompatible with: gilist, negative_gilist, remote, subject, subject_loc -negative_gilist <String> Restrict search of database to everything except the listed GIs * Incompatible with: gilist, seqidlist, remote, subject, subject_loc -entrez_query <String> Restrict search with the given Entrez query * Requires: remote -db_soft_mask <String> Filtering algorithm ID to apply to the BLAST database as soft masking * Incompatible with: db_hard_mask, subject, subject_loc -db_hard_mask <String> Filtering algorithm ID to apply to the BLAST database as hard masking * Incompatible with: db_soft_mask, subject, subject_loc -perc_identity <Real, 0..100> Percent identity -culling_limit <Integer, >=0> If the query range of a hit is enveloped by that of at least this many higher-scoring hits, delete the hit * Incompatible with: best_hit_overhang, best_hit_score_edge -best_hit_overhang <Real, (>=0 and =<0.5)> Best Hit algorithm overhang value (recommended value: 0.1) * Incompatible with: culling_limit -best_hit_score_edge <Real, (>=0 and =<0.5)> Best Hit algorithm score edge value (recommended value: 0.1) * Incompatible with: culling_limit -max_target_seqs <Integer, >=1> Maximum number of aligned sequences to keep Not applicable for outfmt <= 4 Default = `500' * Incompatible with: num_descriptions, num_alignments *** Discontiguous MegaBLAST options -template_type <String, `coding', `coding_and_optimal', `optimal'> Discontiguous MegaBLAST template type * Requires: template_length -template_length <Integer, Permissible values: '16' '18' '21' > Discontiguous MegaBLAST template length * Requires: template_type *** Statistical options -dbsize <Int8> Effective length of the database -searchsp <Int8, >=0> Effective length of the search space -max_hsps <Integer, >=0> Set maximum number of HSPs per subject sequence to save (0 means no limit) Default = `0' -sum_statistics Use sum statistics *** Search strategy options -import_search_strategy <File_In> Search strategy to use * Incompatible with: export_search_strategy -export_search_strategy <File_Out> File name to record the search strategy used * Incompatible with: import_search_strategy *** Extension options -xdrop_ungap <Real> X-dropoff value (in bits) for ungapped extensions -xdrop_gap <Real> X-dropoff value (in bits) for preliminary gapped extensions -xdrop_gap_final <Real> X-dropoff value (in bits) for final gapped alignment -no_greedy Use non-greedy dynamic programming extension -min_raw_gapped_score <Integer> Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages -ungapped Perform ungapped alignment only? -window_size <Integer, >=0> Multiple hits window size, use 0 to specify 1-hit algorithm -off_diagonal_range <Integer, >=0> Number of off-diagonals to search for the 2nd hit, use 0 to turn off Default = `0' *** Miscellaneous options -parse_deflines Should the query and subject defline(s) be parsed? -num_threads <Integer, >=1> Number of threads (CPUs) to use in the BLAST search Default = `1' * Incompatible with: remote -remote Execute search remotely? * Incompatible with: gilist, seqidlist, negative_gilist, subject_loc, num_threads
#let's try creating a file for the BLAST to look in the default BLAST databases (dbs)
#directory on this computer (/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs)
cd home
File "<ipython-input-8-42ba6ba23109>", line 3 cd home ^ SyntaxError: invalid syntax
cd /home
/home
!makefile blastdbs.ncbirc
/bin/sh: 1: makefile: not found
!echo "BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs" > blastdbs.ncbirc
/bin/sh: 1: cannot create blastdbs.ncbirc: Permission denied
!echo "BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs" > blastdbs.ncbirc
!makeblastdb -h
USAGE makeblastdb [-h] [-help] [-in input_file] [-input_type type] -dbtype molecule_type [-title database_title] [-parse_seqids] [-hash_index] [-mask_data mask_data_files] [-mask_id mask_algo_ids] [-mask_desc mask_algo_descriptions] [-gi_mask] [-gi_mask_name gi_based_mask_names] [-out database_name] [-max_file_sz number_of_bytes] [-taxid TaxID] [-taxid_map TaxIDMapFile] [-logfile File_Name] [-version] DESCRIPTION Application to create BLAST databases, version 2.2.29+ Use '-help' to print detailed descriptions of command line arguments
!makeblastdb -in 20140225_RickettsiaGBnt.fasta -dbtype nucl
Building a new DB, current time: 02/25/2014 12:39:55 New DB name: 20140225_RickettsiaGBnt.fasta New DB title: 20140225_RickettsiaGBnt.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B BLAST options error: File 20140225_RickettsiaGBnt.fasta does not exist
!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta -dbtype nucl -out 20140225_RickettsiaGBnt
Building a new DB, current time: 02/25/2014 12:41:16 New DB name: 20140225_RickettsiaGBnt New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|167' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|168' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|296' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|297' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|460' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|479' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|480' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|481' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|482' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|483' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|519' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|520' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|521' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|522' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|541' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1762' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1763' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1764' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1792' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1795' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1805' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1997' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1998' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2068' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2354' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5394' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5471' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5565' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5578' as it has no sequence data Adding sequences from FASTA; added 8786 sequences in 9.54842 seconds.
#check number of entries (i.e. the '>' used to provide the description
#for each entry in source fasta file
!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta
8815
!blastn -h
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps int_value] [-sum_statistics] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.29+ Use '-help' to print detailed descriptions of command line arguments
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
#try this again, but moved the blastdbs.ncbirc file to /home/samb, as suggested by the error message in In[9]
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
#added [BLAST] as first line in the blastdbs.ncbirc file
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
#think I might actually have to specify the specific database in the .ncbirc file? Added full path to Rickettsia db in that file
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
#restarted Terminal and IPython
#also resotored database path to just the "dbs" directory; not to an actualy db file
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
pwd
u'/home/samb'
ls
BioinformaticsTools/ Documents/ PhageNGS_ID.ipynb Templates/ blastdbs.ncbirc Downloads/ Pictures/ Ubuntu One/ blastdbs.ncbirc~ examples.desktop Public/ Videos/ Desktop/ Music/ sed_and_awk_practice.ipynb
!head -10 blastdbs.ncbirc
[BLAST] BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs
#copied that .ncbirc file to the root directory (/) of the computer
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]
!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8
!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt
Contig366DeNovoAssembly gi|311103224|ref|NC_014640.1| 79.95 369 66 5 1 366 820544 820907 2e-69 265 Contig366DeNovoAssembly gi|311103224|ref|NC_014640.1| 79.95 369 66 5 1 366 2208847 2209210 2e-69 265 Contig366DeNovoAssembly gi|311103224|ref|NC_014640.1| 79.95 369 66 5 1 366 5159121 5158758 2e-69 265 Contig2706DeNovoAssembly gi|390137196|gb|AJWD01000108.1| 92.59 54 4 0 1 54 1637 1584 3e-14 78.7 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 3205273 3205225 7e-11 67.6 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 3854412 3854364 7e-11 67.6 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 4413608 4413560 7e-11 67.6 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 4600084 4600036 7e-11 67.6 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 4658009 4657961 7e-11 67.6 Contig4266DeNovoAssembly gi|60679597|ref|NC_003228.3| 91.84 49 2 2 1 47 5128163 5128115 7e-11 67.6
#count entries in new Rickettsia GenBank fasta file
!awk '/>/ { count++ } END { print count }' /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta
11414
!makeblastdb -in /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227
/bin/sh: 1: makeblastdb: Permission denied
cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs
/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs
ls -l
total 2428056 -rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta -rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr -rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin -rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq -rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta
cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs
/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs
ls -l
total 2428056 -rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta -rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr -rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin -rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq -rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta
ls -l
total 2428056 -rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta -rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr -rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin -rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq -rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta
ls -l
total 2428056 -rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta -rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr -rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin -rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq -rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta
!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227
Building a new DB, current time: 02/28/2014 12:05:12 New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227 New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|167' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|168' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|296' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|297' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|460' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|479' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|480' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|481' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|482' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|483' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|519' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|520' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|521' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|522' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|541' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1762' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1763' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1764' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1792' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1795' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1805' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1997' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1998' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2068' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2354' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5394' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5471' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5565' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5578' as it has no sequence data Adding sequences from FASTA; added 11385 sequences in 51.2601 seconds.
!blastn -db RickettsiaGBnt20140227 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt "6 stitle std" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt
!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt
gi|288926859|gb|CP000613.2| Rhodospirillum centenum SW, complete genome Contig5DeNovoAssembly gi|288926859|gb|CP000613.2| 90.00 30 3 0 67 96 3744736 3744765 0.071 41.0 gi|327396847|dbj|AB104413.1| Red sea bream iridovirus genomic DNA, circular physical map, complete sequence Contig6DeNovoAssembly gi|327396847|dbj|AB104413.1| 76.09 46 5 1 52 91 102234 102279 3.7 35.6 gi|255529916|ref|NC_013061.1| Pedobacter heparinus DSM 2366 chromosome, complete genome Contig9DeNovoAssembly gi|255529916|ref|NC_013061.1| 86.21 29 4 0 64 92 1205031 1205003 3.6 35.6 gi|18308982|ref|NC_003366.1| Clostridium perfringens str. 13 chromosome, complete genome Contig10DeNovoAssembly gi|18308982|ref|NC_003366.1| 83.33 36 6 0 53 88 1178864 1178899 0.20 39.2 gi|42494965|emb|AJ270058.1| Arabidopsis thaliana DNA chromosome 4, short arm Contig11DeNovoAssembly gi|42494965|emb|AJ270058.1| 89.29 28 3 0 75 102 387185 387212 0.98 37.4 gi|386818599|ref|NZ_JH651379.1| Joostella marina DSM 19592 genomic scaffold Joomascaffold_1, whole genome shotgun sequence Contig13DeNovoAssembly gi|386818599|ref|NZ_JH651379.1| 87.10 31 4 0 3 33 598184 598154 0.071 39.2 gi|552562410|gb|CM000780.3| Zea mays cultivar B73 chromosome 4 Contig15DeNovoAssembly gi|552562410|gb|CM000780.3| 82.05 39 5 1 26 64 229168348 229168384 0.41 37.4 gi|55417891|dbj|BA000014.8| Arabidopsis thaliana DNA, chromosome 3, complete sequence Contig16DeNovoAssembly gi|55417891|dbj|BA000014.8| 89.66 29 1 1 17 43 22640442 22640470 0.25 37.4 gi|32444162|emb|BX294142.1| Rhodopirellula baltica SH 1 complete genome; segment 10/24 Contig17DeNovoAssembly gi|32444162|emb|BX294142.1| 100.00 19 0 0 50 68 112716 112698 1.6 35.6 gi|584450787|emb|HG916852.1| Rhizobium sp. LPU83 main chrosome complete genome Contig19DeNovoAssembly gi|584450787|emb|HG916852.1| 100.00 19 0 0 6 24 1422493 1422475 0.95 35.6
!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta
10788
!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228
Building a new DB, current time: 02/28/2014 15:24:39 New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228 New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|167' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|168' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|296' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|297' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|460' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|479' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|480' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|481' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|482' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|483' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|519' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|520' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|521' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|522' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|541' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1762' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1763' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1764' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1792' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1795' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1805' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1997' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|1998' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2068' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|2354' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5398' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5475' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5569' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|5582' as it has no sequence data Adding sequences from FASTA; added 10759 sequences in 40.3107 seconds.
!blastn -db RickettsiaGBnt20140228 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt "6 stitle std" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt