!date
Tue Oct 25 12:30:59 PDT 2016
%%bash
system_profiler SPSoftwareDataType
Software: System Software Overview: System Version: Mac OS X 10.7.5 (11G63) Kernel Version: Darwin 11.4.2 Boot Volume: SSD2 Boot Mode: Normal Computer Name: greenbird (2) User Name: Sam (Sam) Secure Virtual Memory: Enabled 64-bit Kernel and Extensions: No Time since boot: 32 days 1:12
%%bash
#Uses grep to exclude lines that display serial number and hardware UUID
system_profiler SPHardwareDataType | grep -v [SH][ea]
Model Name: Mac Pro Model Identifier: MacPro1,1 Processor Name: Dual-Core Intel Xeon Processor Speed: 3 GHz Number of Processors: 2 Total Number of Cores: 4 L2 Cache (per Processor): 4 MB Memory: 14 GB Bus Speed: 1.33 GHz Boot ROM Version: MP11.005C.B08 SMC Version (system): 1.7f10
%%bash
ls /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/
151114_I191_FCH3Y35BCXX_L1_wHAIPI023989-79_1.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L1_wHAIPI023989-79_2.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023988-81_1.fq.gz.clean.dup.clean.gz 151114_I191_FCH3Y35BCXX_L2_wHAMPI023988-81_2.fq.gz.clean.dup.clean.gz 151122_I136_FCH3L2FBBXX_L7_wHAXPI023990-97_1.fq.gz.clean.dup.clean.gz 151122_I136_FCH3L2FBBXX_L7_wHAXPI023990-97_2.fq.gz.clean.dup.clean.gz 20160512_F15FTSUSAT0328_genome_survey.pdf Panopea_generosa.GC_content_vs_depth.png Panopea_generosa.scafSeq Panopea_generosa.scafSeq.zip README md5.txt readme.md
%%bash
#For loop generates a md5 checksum has value for each file
#and appends the output to the checksums.md5 file.
time for file in /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/*.gz
do
md5 "$file" >> /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/checksums.md5
done
real 27m34.914s user 2m13.337s sys 3m31.050s
%%bash
#Initializes variable.
totalreads=0
#For loop counts the lines in each file and divides them by four. This is performed because
#Illumina sequencing files are composed of four lines per read.
#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]
#and is printed after the for loop completes.
#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.
#The command "tee -a" is used to both print the output to the screen and append the output to the readme.md file.
time for file in /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/*.gz
do linecount=`gunzip -c "$file" | wc -l`
readcount=$((linecount/4))
totalreads=$((readcount+totalreads))
printf "%s\t%s\n" "${file##*/}" "$readcount" | tee -a /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/readme.md
done
echo $totalreads
151114_I191_FCH3Y35BCXX_L1_wHAIPI023989-79_1.fq.gz.clean.dup.clean.gz 94337047 151114_I191_FCH3Y35BCXX_L1_wHAIPI023989-79_2.fq.gz.clean.dup.clean.gz 94337047 151114_I191_FCH3Y35BCXX_L2_wHAMPI023988-81_1.fq.gz.clean.dup.clean.gz 72605592 151114_I191_FCH3Y35BCXX_L2_wHAMPI023988-81_2.fq.gz.clean.dup.clean.gz 72605592 151122_I136_FCH3L2FBBXX_L7_wHAXPI023990-97_1.fq.gz.clean.dup.clean.gz 77590326 151122_I136_FCH3L2FBBXX_L7_wHAXPI023990-97_2.fq.gz.clean.dup.clean.gz 77590326 489065930
real 59m30.674s user 28m20.502s sys 5m0.038s
%%bash
time grep ">" /Volumes/web/P_generosa_genome_assemblies_BGI/20160512/Panopea_generosa.scafSeq | wc -l
1296135
real 0m53.761s user 0m1.544s sys 0m3.809s