#make Anaplasma BLAST database
#check source fasta file for number of sequences
#by counting the number of greater-than symbols ('>') in the file, since each
#sequence entry is preceeded by a '>'.
!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305.fasta
12065
#make the Anaplasma BLAST database
#put the "time" command at the beginning for fun
#the "time" command is NOT part of the BLAST package, but is already built into the Terminal
!time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305
Building a new DB, current time: 03/12/2014 14:49:21 New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305 New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|43' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|44' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|45' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|46' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|587' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|588' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|589' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|590' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|591' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|592' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|593' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10021' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10022' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10023' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10430' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10431' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|10432' as it has no sequence data Adding sequences from FASTA; added 12048 sequences in 2.09418 seconds. 2.08user 0.02system 0:02.19elapsed 96%CPU (0avgtext+0avgdata 15324maxresident)k 31704inputs+32024outputs (156major+5517minor)pagefaults 0swaps
#check cowdria fasta file numbers
!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305.fasta
2289
#make cowdria BLAST database
!time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305
Building a new DB, current time: 03/12/2014 14:53:33 New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305 New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|116' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|421' as it has no sequence data Adding sequences from FASTA; added 2287 sequences in 0.763971 seconds. 0.76user 0.01system 0:00.77elapsed 99%CPU (0avgtext+0avgdata 12408maxresident)k 0inputs+13152outputs (0major+3981minor)pagefaults 0swaps
#check ehrlichia fasta file numbers
!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305.fasta
3020
#make ehrlichia BLAST database
!time makeblastdb -dbtype nucl -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305.fasta -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305
Building a new DB, current time: 03/12/2014 14:56:01 New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305 New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305.fasta Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|203' as it has no sequence data Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given Ignoring sequence 'lcl|206' as it has no sequence data Adding sequences from FASTA; added 3018 sequences in 1.93499 seconds. 1.92user 0.02system 0:01.94elapsed 100%CPU (0avgtext+0avgdata 19968maxresident)k 0inputs+35296outputs (0major+10541minor)pagefaults 0swaps
#perform BLASTN of de novo assembly of all abalone seqs
#against anaplasma BLAST database
!time blastn -db -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsAnaplasmaGBntBLASTN.txt
USAGE blastn [-h] [-help] [-import_search_strategy filename] [-export_search_strategy filename] [-task task_name] [-db database_name] [-dbsize num_letters] [-gilist filename] [-seqidlist filename] [-negative_gilist filename] [-entrez_query entrez_query] [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] [-subject subject_input_file] [-subject_loc range] [-query input_file] [-out output_file] [-evalue evalue] [-word_size int_value] [-gapopen open_penalty] [-gapextend extend_penalty] [-perc_identity float_value] [-xdrop_ungap float_value] [-xdrop_gap float_value] [-xdrop_gap_final float_value] [-searchsp int_value] [-max_hsps int_value] [-sum_statistics] [-penalty penalty] [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value] [-template_type type] [-template_length int_value] [-dust DUST_options] [-filtering_db filtering_database] [-window_masker_taxid window_masker_taxid] [-window_masker_db window_masker_db] [-soft_masking soft_masking] [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value] [-best_hit_score_edge float_value] [-window_size int_value] [-off_diagonal_range int_value] [-use_index boolean] [-index_name string] [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines] [-outfmt format] [-show_gis] [-num_descriptions int_value] [-num_alignments int_value] [-html] [-max_target_seqs num_sequences] [-num_threads int_value] [-remote] [-version] DESCRIPTION Nucleotide-Nucleotide BLAST 2.2.29+ Use '-help' to print detailed descriptions of command line arguments ======================================================================== Error: Too many positional arguments (1), the offending value: blastn Command exited with non-zero status 1 0.01user 0.01system 0:00.13elapsed 21%CPU (0avgtext+0avgdata 11124maxresident)k 43552inputs+0outputs (219major+2645minor)pagefaults 0swaps
#forgot to specify BLAST db
!time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/AnaplasmaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsAnaplasmaGBntBLASTN.txt
90.64user 0.86system 0:29.70elapsed 308%CPU (0avgtext+0avgdata 67624maxresident)k 6224inputs+7688outputs (17major+371373minor)pagefaults 0swaps
#perform BLASTN of de novo assembly of all abalone seqs
#against cowdria BLAST database
!time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/CowdriaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsCowdriaGBntBLASTN.txt
100.50user 2.37system 0:36.14elapsed 284%CPU (0avgtext+0avgdata 62192maxresident)k 0inputs+16328outputs (0major+415944minor)pagefaults 0swaps
#perform BLASTN of de novo assembly of all abalone seqs
#against ehrilichia BLAST database
!time blastn -db /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/EhrlichiaGBnt20140305 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AllAbDenovo7118contigs.fa -outfmt "6 stitle std" -max_target_seqs 3 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AllAbDenovo7118contigsEhrlichiaGBntBLASTN.txt
136.71user 1.80system 0:43.49elapsed 318%CPU (0avgtext+0avgdata 72420maxresident)k 0inputs+12312outputs (0major+587148minor)pagefaults 0swaps