cd /Volumes/Data/blast_db/
/Volumes/Data/blast_db
!makeblastdb -in /Volumes/Data/Sam/fish546_2015/gigasHSrnaSeq/raw_data/Crassostrea_gigas.GCA_000297895.1.24.fa \
-dbtype nucl -parse_seqids
Building a new DB, current time: 04/29/2015 12:04:24 New DB name: /Volumes/Data/Sam/fish546_2015/gigasHSrnaSeq/raw_data/Crassostrea_gigas.GCA_000297895.1.24.fa New DB title: /Volumes/Data/Sam/fish546_2015/gigasHSrnaSeq/raw_data/Crassostrea_gigas.GCA_000297895.1.24.fa Sequence type: Nucleotide Keep Linkouts: T Keep MBits: T Maximum file size: 1000000000B Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: First data line in seq is about 45% ambiguous nucleotides (shouldn't be over 40%) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1139 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1138 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1046 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1781 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1127 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1059 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1458 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1029 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1113 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 2360 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1155 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1233 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1239 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1855 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1379 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 3311 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1001 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: First data line in seq is about 81% ambiguous nucleotides (shouldn't be over 40%) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1113 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1590 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1142 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1071 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1030 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1030 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1071 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1100 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1659 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1561 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1197 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1660 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1042 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1127 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: First data line in seq is about 70% ambiguous nucleotides (shouldn't be over 40%) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1113 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1323 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1072 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1323 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1085 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1225 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1043 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1911 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 1027 characters (max is 1000) Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: Title is very long: 2121 characters (max is 1000) Adding sequences from FASTA; added 28631 sequences in 2.92758 seconds.
!ls Crassostrea_gigas.GCA_000297895.1.24.fa.*
Crassostrea_gigas.GCA_000297895.1.24.fa.nhr Crassostrea_gigas.GCA_000297895.1.24.fa.nsd Crassostrea_gigas.GCA_000297895.1.24.fa.nin Crassostrea_gigas.GCA_000297895.1.24.fa.nsi Crassostrea_gigas.GCA_000297895.1.24.fa.nog Crassostrea_gigas.GCA_000297895.1.24.fa.nsq
cd /Users/Sam/scratch/
/Users/Sam/scratch
!blastn -task blastn \
-query ../Owl/halfshell/EmmaBS400.fa \
-db Crassostrea_gigas.GCA_000297895.1.24.fa \
-outfmt 6 \
-max_target_seqs 1 \
-num_threads 16 \
-out 20150429_gigas_OA_blastn.tab
!head 20150429_gigas_OA_blastn.tab
20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_1 28611 87.10 31 4 0 353 383 121 91 0.065 39.2 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_2 16412 83.72 43 5 2 673 714 1921 1962 0.30 39.2 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_3 27229 92.31 26 2 0 434 459 253 228 0.042 39.2 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_4 27355 95.65 23 1 0 1731 1753 1034 1056 0.51 37.4 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_5 20747 88.00 25 3 0 586 610 822 846 5.1 31.9 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_6 17358 100.00 20 0 0 419 438 676 695 0.11 37.4 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_7 17875 100.00 18 0 0 5 22 1718 1735 1.2 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_8 16813 71.03 107 19 5 1 107 312 406 0.003 42.8 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_9 19960 76.81 69 7 4 1112 1180 388 447 0.010 42.8 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_10 27511 87.50 32 3 1 287 318 36 66 0.12 37.4
!tail 20150429_gigas_OA_blastn.tab
20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37167 6521 100.00 17 0 0 140 156 660 676 4.8 31.9 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37168 7246 100.00 18 0 0 160 177 1545 1562 1.3 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37169 9758 100.00 18 0 0 17 34 381 398 1.3 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37170 14641 100.00 20 0 0 103 122 448 467 0.12 37.4 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37171 7712 64.99 477 159 4 7 479 716 1188 1e-19 96.9 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37172 2671 91.30 23 2 0 375 397 6520 6542 1.4 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37173 20239 92.00 25 2 0 187 211 474 498 0.11 37.4 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37174 16752 85.71 28 4 0 228 255 178 151 1.2 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37175 10580 100.00 18 0 0 501 518 2233 2216 1.4 33.7 20150414_trimmed_2212_lane2_400ppm_GCCAAT_contig_37176 10936 89.29 28 3 0 374 401 133 160 0.10 37.4
!wc -l 20150429_gigas_OA_blastn.tab
38307 20150429_gigas_OA_blastn.tab
!grep -c '>' ../Owl/halfshell/EmmaBS400.fa
37176
!cut -f 11 20150429_gigas_OA_blastn.tab | sort -n | head -50
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 sort: write failed: standard output: Broken pipe sort: write error
The code below cuts out the 11th column (-f 11) of our BLAST output file. That column is piped to grep. Grep finds the fixed string specified (-F) and those lines the matches that match the entire line (-x) and counts the lines (-c).
!cut -f 11 20150429_gigas_OA_blastn.tab | grep -cFx '0.0'
33