See this GitHub Issue.
This notebook utilized files generated on 20220419 (Notebook entry).
%%bash
echo "TODAY'S DATE"
date
echo "------------"
echo ""
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "
hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE Fri Mar 24 08:44:37 AM PDT 2023 ------------ Distributor ID: Ubuntu Description: Ubuntu 22.04.2 LTS Release: 22.04 Codename: jammy ------------ HOSTNAME: computer ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Address sizes: 45 bits physical, 48 bits virtual Byte Order: Little Endian CPU(s): 4 On-line CPU(s) list: 0-3 Vendor ID: GenuineIntel Model name: Intel(R) Core(TM) i9-10885H CPU @ 2.40GHz CPU family: 6 Model: 165 Thread(s) per core: 1 Core(s) per socket: 1 Socket(s): 4 Stepping: 2 BogoMIPS: 4800.05 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon nopl xtopology tsc_reliable nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt xsaveopt xsavec xgetbv1 xsaves arat flush_l1d arch_capabilities Hypervisor vendor: VMware Virtualization type: full L1d cache: 128 KiB (4 instances) L1i cache: 128 KiB (4 instances) L2 cache: 1 MiB (4 instances) L3 cache: 64 MiB (4 instances) NUMA node(s): 1 NUMA node0 CPU(s): 0-3 Vulnerability Itlb multihit: KVM: Mitigation: VMX unsupported Vulnerability L1tf: Mitigation; PTE Inversion Vulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown Vulnerability Meltdown: Mitigation; PTI Vulnerability Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown Vulnerability Retbleed: Mitigation; IBRS Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization Vulnerability Spectre v2: Mitigation; IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS Not affected Vulnerability Srbds: Unknown: Dependent on hypervisor status Vulnerability Tsx async abort: Not affected ------------ Memory Specs total used free shared buff/cache available Mem: 54Gi 2.1Gi 50Gi 184Mi 2.2Gi 51Gi Swap: 2.0Gi 0B 2.0Gi
No LSB modules are available.
%env
indicates a bash variable%env
is Python variablec######################################################################
### Set directories
%env data_dir=/home/sam/data/P_generosa/genomes
%env analysis_dir=/home/sam/analyses/20230322-pgen-gene_annotation-update
analysis_dir="/home/sam/analyses/20230322-pgen-gene_annotation-update"
#####################################################################
### Input files
%env base_url=https://gannet.fish.washington.edu/Atumefaciens/20220419-pgen-gene_annotation_mapping
# UniProt batch results
%env uniprot_output=20220419-pgen-uniprot_batch-results.txt
# Genome IDs and SPIDs
%env genome_IDs_SPIDs=Panopea-generosa-v1.0.a4-blast-diamond-functional-genome_IDs-SPIDs.txt
######################################################################
### Output files
# Parsed UniProt
%env parsed_uniprot=20230322-pgen-accession-gene_name-gene_description-go_ids.tab
# Final output
%env joined_output=20230322-pgen-gene-accessions-gene_id-gene_name-gene_description-alt_gene_description-all_go_ids-C_go_ids-P_go_ids-F_go_ids.tab
######################################################################
env: data_dir=/home/sam/data/P_generosa/genomes env: analysis_dir=/home/sam/analyses/20230322-pgen-gene_annotation-update env: base_url=https://gannet.fish.washington.edu/Atumefaciens/20220419-pgen-gene_annotation_mapping env: uniprot_output=20220419-pgen-uniprot_batch-results.txt env: genome_IDs_SPIDs=Panopea-generosa-v1.0.a4-blast-diamond-functional-genome_IDs-SPIDs.txt env: parsed_uniprot=20230322-pgen-accession-gene_name-gene_description-go_ids.tab env: joined_output=20230322-pgen-gene-accessions-gene_id-gene_name-gene_description-alt_gene_description-all_go_ids-C_go_ids-P_go_ids-F_go_ids.tab
%%bash
# If directories don't exist, make them
mkdir --parents "${analysis_dir}"
--quiet
: Prevents wget
output from overwhelming Jupyter Notebook
--continue
: If download was previously initiated, will continue where leftoff and will not create a second file if one already exists.
%%bash
cd "${analysis_dir}"
wget --quiet --continue "${base_url}/${uniprot_output}"
wget --quiet --continue "${base_url}/${genome_IDs_SPIDs}"
ls -ltrh
echo ""
echo "---------------------------------------------------------"
echo ""
head -n 25 *.txt
total 138M -rw-rw-r-- 1 sam sam 359K Apr 20 2022 Panopea-generosa-v1.0.a4-blast-diamond-functional-genome_IDs-SPIDs.txt -rw-rw-r-- 1 sam sam 138M Apr 20 2022 20220419-pgen-uniprot_batch-results.txt --------------------------------------------------------- ==> 20220419-pgen-uniprot_batch-results.txt <== ID CAMT1_DICDI Reviewed; 230 AA. AC Q86IC9; Q552T5; DT 05-MAY-2009, integrated into UniProtKB/Swiss-Prot. DT 01-JUN-2003, sequence version 1. DT 23-FEB-2022, entry version 92. DE RecName: Full=Probable caffeoyl-CoA O-methyltransferase 1; DE EC=2.1.1.104; DE AltName: Full=O-methyltransferase 5; GN Name=omt5; ORFNames=DDB_G0275499; OS Dictyostelium discoideum (Slime mold). OC Eukaryota; Amoebozoa; Evosea; Eumycetozoa; Dictyostelia; Dictyosteliales; OC Dictyosteliaceae; Dictyostelium. OX NCBI_TaxID=44689; RN [1] RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. RC STRAIN=AX4; RX PubMed=12097910; DOI=10.1038/nature00847; RA Gloeckner G., Eichinger L., Szafranski K., Pachebat J.A., Bankier A.T., RA Dear P.H., Lehmann R., Baumgart C., Parra G., Abril J.F., Guigo R., RA Kumpf K., Tunggal B., Cox E.C., Quail M.A., Platzer M., Rosenthal A., RA Noegel A.A.; RT "Sequence and analysis of chromosome 2 of Dictyostelium discoideum."; RL Nature 418:79-85(2002). RN [2] RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. ==> Panopea-generosa-v1.0.a4-blast-diamond-functional-genome_IDs-SPIDs.txt <== Q86IC9 PGEN_.00g000010 P04177 PGEN_.00g000020 Q8L840 PGEN_.00g000050 Q61043 PGEN_.00g000060 A1E2V0 PGEN_.00g000080 P34456 PGEN_.00g000090 P34457 PGEN_.00g000120 O00463 PGEN_.00g000210 Q00945 PGEN_.00g000230 Q5SWK7 PGEN_.00g000240 Q8ZXT3 PGEN_.00g000280 Q5REG4 PGEN_.00g000300 Q8QG60 PGEN_.00g000380 Q9H583 PGEN_.00g000440 A0JMR6 PGEN_.00g000450 O88917 PGEN_.00g000460 Q7D513 PGEN_.00g000490 Q92968 PGEN_.00g000520 A6H769 PGEN_.00g000530 Q14676 PGEN_.00g000540 Q54W11 PGEN_.00g000560 Q9FKK7 PGEN_.00g000600 Q3ZCD7 PGEN_.00g000660 Q9D180 PGEN_.00g000670 Q9D6Z0 PGEN_.00g000680
Print the first entry (end of each entry denoted by line beginning with //
.
Let's break it down step by step:
grep -n "^//"
- This command searches for all lines that begin with //
in the file.txt and uses the -n flag to include line numbers in the output.
head -n 1
- This command takes the first line of the output from grep, which is the line number of the first line that begins with//
.
cut -d ":" -f 1
- This command extracts the line number from the output of head by splitting the output at the colon (:) and selecting the first field.
xargs -I {} head -n {}
- This command uses the line number as an argument for the head command, which prints the first n lines of a file. The xargs command is used to pass the line number to head as an argument.
This command will print all lines in ${uniprot_output}
up to the first line that begins with //
.
Counting accessions:
grep -c "^AC"
Counts Accession lines (beginning with AC
).%%bash
cd "${analysis_dir}"
grep -n "^//" "${uniprot_output}" \
| head -n 1 \
| cut -d ":" -f 1 \
| xargs -I {} head -n {} "${uniprot_output}"
echo ""
echo "----------------------------------------------------"
echo ""
echo "Number of accessions:"
echo ""
grep -c "^AC" "${uniprot_output}"
ID CAMT1_DICDI Reviewed; 230 AA. AC Q86IC9; Q552T5; DT 05-MAY-2009, integrated into UniProtKB/Swiss-Prot. DT 01-JUN-2003, sequence version 1. DT 23-FEB-2022, entry version 92. DE RecName: Full=Probable caffeoyl-CoA O-methyltransferase 1; DE EC=2.1.1.104; DE AltName: Full=O-methyltransferase 5; GN Name=omt5; ORFNames=DDB_G0275499; OS Dictyostelium discoideum (Slime mold). OC Eukaryota; Amoebozoa; Evosea; Eumycetozoa; Dictyostelia; Dictyosteliales; OC Dictyosteliaceae; Dictyostelium. OX NCBI_TaxID=44689; RN [1] RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. RC STRAIN=AX4; RX PubMed=12097910; DOI=10.1038/nature00847; RA Gloeckner G., Eichinger L., Szafranski K., Pachebat J.A., Bankier A.T., RA Dear P.H., Lehmann R., Baumgart C., Parra G., Abril J.F., Guigo R., RA Kumpf K., Tunggal B., Cox E.C., Quail M.A., Platzer M., Rosenthal A., RA Noegel A.A.; RT "Sequence and analysis of chromosome 2 of Dictyostelium discoideum."; RL Nature 418:79-85(2002). RN [2] RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. RC STRAIN=AX4; RX PubMed=15875012; DOI=10.1038/nature03481; RA Eichinger L., Pachebat J.A., Gloeckner G., Rajandream M.A., Sucgang R., RA Berriman M., Song J., Olsen R., Szafranski K., Xu Q., Tunggal B., RA Kummerfeld S., Madera M., Konfortov B.A., Rivero F., Bankier A.T., RA Lehmann R., Hamlin N., Davies R., Gaudet P., Fey P., Pilcher K., Chen G., RA Saunders D., Sodergren E.J., Davis P., Kerhornou A., Nie X., Hall N., RA Anjard C., Hemphill L., Bason N., Farbrother P., Desany B., Just E., RA Morio T., Rost R., Churcher C.M., Cooper J., Haydock S., van Driessche N., RA Cronin A., Goodhead I., Muzny D.M., Mourier T., Pain A., Lu M., Harper D., RA Lindsay R., Hauser H., James K.D., Quiles M., Madan Babu M., Saito T., RA Buchrieser C., Wardroper A., Felder M., Thangavelu M., Johnson D., RA Knights A., Loulseged H., Mungall K.L., Oliver K., Price C., Quail M.A., RA Urushihara H., Hernandez J., Rabbinowitsch E., Steffen D., Sanders M., RA Ma J., Kohara Y., Sharp S., Simmonds M.N., Spiegler S., Tivey A., RA Sugano S., White B., Walker D., Woodward J.R., Winckler T., Tanaka Y., RA Shaulsky G., Schleicher M., Weinstock G.M., Rosenthal A., Cox E.C., RA Chisholm R.L., Gibbs R.A., Loomis W.F., Platzer M., Kay R.R., RA Williams J.G., Dear P.H., Noegel A.A., Barrell B.G., Kuspa A.; RT "The genome of the social amoeba Dictyostelium discoideum."; RL Nature 435:43-57(2005). CC -!- CATALYTIC ACTIVITY: CC Reaction=(E)-caffeoyl-CoA + S-adenosyl-L-methionine = (E)-feruloyl-CoA CC + H(+) + S-adenosyl-L-homocysteine; Xref=Rhea:RHEA:16925, CC ChEBI:CHEBI:15378, ChEBI:CHEBI:57856, ChEBI:CHEBI:59789, CC ChEBI:CHEBI:87136, ChEBI:CHEBI:87305; EC=2.1.1.104; CC -!- SIMILARITY: Belongs to the class I-like SAM-binding methyltransferase CC superfamily. Cation-dependent O-methyltransferase family. CCoAMT CC subfamily. {ECO:0000255|PROSITE-ProRule:PRU01019}. CC --------------------------------------------------------------------------- CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms CC Distributed under the Creative Commons Attribution (CC BY 4.0) License CC --------------------------------------------------------------------------- DR EMBL; AAFI02000013; EAL69500.1; -; Genomic_DNA. DR RefSeq; XP_643596.1; XM_638504.1. DR SMR; Q86IC9; -. DR STRING; 44689.DDB0229910; -. DR PaxDb; Q86IC9; -. DR EnsemblProtists; EAL69500; EAL69500; DDB_G0275499. DR GeneID; 8620183; -. DR KEGG; ddi:DDB_G0275499; -. DR dictyBase; DDB_G0275499; omt5. DR eggNOG; KOG1663; Eukaryota. DR HOGENOM; CLU_067676_5_1_1; -. DR InParanoid; Q86IC9; -. DR OMA; MGEHPRL; -. DR PhylomeDB; Q86IC9; -. DR PRO; PR:Q86IC9; -. DR Proteomes; UP000002195; Chromosome 2. DR GO; GO:0042409; F:caffeoyl-CoA O-methyltransferase activity; IEA:UniProtKB-EC. DR GO; GO:0046872; F:metal ion binding; IEA:UniProtKB-KW. DR GO; GO:0008757; F:S-adenosylmethionine-dependent methyltransferase activity; IBA:GO_Central. DR GO; GO:0032259; P:methylation; IEA:UniProtKB-KW. DR InterPro; IPR029063; SAM-dependent_MTases. DR InterPro; IPR002935; SAM_O-MeTrfase. DR Pfam; PF01596; Methyltransf_3; 1. DR SUPFAM; SSF53335; SSF53335; 1. DR PROSITE; PS51682; SAM_OMT_I; 1. PE 3: Inferred from homology; KW Metal-binding; Methyltransferase; Reference proteome; KW S-adenosyl-L-methionine; Transferase. FT CHAIN 1..230 FT /note="Probable caffeoyl-CoA O-methyltransferase 1" FT /id="PRO_0000371324" FT REGION 76..77 FT /note="S-adenosyl-L-methionine binding" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT METAL 151 FT /note="Divalent metal cation" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT METAL 177 FT /note="Divalent metal cation" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT METAL 178 FT /note="Divalent metal cation" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 52 FT /note="S-adenosyl-L-methionine; via amide nitrogen" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 74 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 82 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 100 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 129 FT /note="S-adenosyl-L-methionine; via amide nitrogen" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 151 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 153 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" FT BINDING 160 FT /note="S-adenosyl-L-methionine" FT /evidence="ECO:0000255|PROSITE-ProRule:PRU01019" SQ SEQUENCE 230 AA; 25745 MW; CA6C8EE6FCA0E457 CRC64; MEKTTPTQYD VKVQYNNSIL NYAIDHSDQL TDIQKELIQF TKENIERHIM LTQAEQCSFF KLLIQVLNAK KTIDIGVFTG LSSLTAALAM GDEGRVVACD VSTDYTQHAL KFWAKAGVDH KINLKIQPAS KTLQELIDQG EENTYDFVFI DADKTGYDTY YELSLKLIRK GGIIAIDNVL QHGRVADPNA NVEPNLVAIR ALNDKILADK RVTKTMLPIA DGITLVTKIN // ---------------------------------------------------- Number of accessions: 10634
UniProt accession
Gene name/abbreviation
Gene description
GO IDs
GO aspect (cellular component C
, molecular function F
, and biological process P
)
Checks lines beginning with DE
to identify values in the 2nd field with Name
in them.
Identifies unique values. This will determine how to parse properly after this.
%%bash
cd "${analysis_dir}"
grep "^DE" "${uniprot_output}" | awk '$2 ~ /Name/ { print $2 }' | sort -u
AltName: RecName:
%%bash
cd "${analysis_dir}"
# Loop through UniProt records
time \
while read -r line
do
# Get record line descriptor
descriptor=$(echo "${line}" | awk '{print $1}')
# Capture second field for evaluation
go_line=$(echo "${line}" | awk '{print $2}')
# Append GO IDs to array
if [[ "${go_line}" == "GO;" ]]; then
go_id=$(echo "${line}" | awk '{print $3}')
go_ids_array+=("${go_id}")
go_id_aspect=$(echo "${line}" | awk '{print $4}' | awk -F":" '{print $1}')
if [[ "${go_id_aspect}" == "C" ]]; then
go_id_C_array+=("${go_id}")
elif [[ "${go_id_aspect}" == "F" ]]; then
go_id_F_array+=("${go_id}")
elif [[ "${go_id_aspect}" == "P" ]]; then
go_id_P_array+=("${go_id}")
fi
elif [[ "${go_line}" == "GeneID;" ]]; then
# Uses sed to strip trailing semi-colon
gene_id=$(echo "${line}" | awk '{print $3}' | sed 's/;$//')
fi
# Get gene description
if [[ "${descriptor}" == "DE" ]] && [[ "${go_line}" == "RecName:" ]]; then
# Uses sed to strip trailing spaces at end of line and remove commas
gene_description=$(echo "${line}" | awk -F "[={]" '{print $2}' | sed 's/[[:blank:]]*$//' | sed 's/,//g' | sed 's/;$//')
# Get alternate name
elif [[ "${descriptor}" == "DE" ]] && [[ "${go_line}" == "AltName:" ]]; then
# Uses sed to strip trailing spaces at end of line and remove commas
alt_gene_description=$(echo "${line}" | awk -F "[={]" '{print $2}' | sed 's/[[:blank:]]*$//' | sed 's/,//g' | sed 's/;$//')
# Get gene name
elif [[ "${descriptor}" == "GN" ]] && [[ $(echo "${line}" | awk -F "=" '{print $1}') == "GN Name" ]]; then
# Uses sed to strip trailing spaces at end of line
gene=$(echo "${line}" | awk -F 'Name=|{|;' '{print $2}' | sed 's/[[:blank:]]*$//')
# Get UniProt accession
elif [[ "${descriptor}" == "AC" ]]; then
# awk removes "AC" notation
# sed removes all spaces
# sed removes trailing semi-colon
# Uses array to handle accessions being on multiple lines of UniProt records file
accession=$(echo "${line}" | awk '{$1="";print $0}' | sed 's/[[:space:]]*//g' | sed 's/;$//')
accessions_array+=("${accession}")
# Identify beginning on new record
elif [[ "${descriptor}" == "//" ]]; then
### Format GO arrays for easier printing ###
# Remove semi-colon delimiters
go_ids_array=("${go_ids_array[@]/;}")
go_id_C_array=("${go_id_C_array[@]/;}")
go_id_F_array=("${go_id_F_array[@]/;}")
go_id_P_array=("${go_id_P_array[@]/;}")
# Join array elements using semi-colon
# sets the IFS (Internal Field Separator) to semicolon
joined_go_ids=$(IFS=';' && echo "${go_ids_array[*]}")
joined_go_id_C=$(IFS=';' && echo "${go_id_C_array[*]}")
joined_go_id_F=$(IFS=';' && echo "${go_id_F_array[*]}")
joined_go_id_P=$(IFS=';' && echo "${go_id_P_array[*]}")
### End GO array formatting ###
### Print tab-delimited ###
# Prints other comma-separated variables, then GOID1;GOID2;GOIDn
# IFS prevents spaces from being added between GO IDs
# sed removes ";" after final GO ID
(IFS=; printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
"${accessions_array[*]}" \
"${gene_id}" \
"${gene}" \
"${gene_description}" \
"${alt_gene_description}" \
"${joined_go_ids}" \
"${joined_go_id_C}" \
"${joined_go_id_P}" \
"${joined_go_id_F}")
### END PRINTING ###
# Re-initialize variables
accession=""
accessions_array=()
descriptor=""
gene=""
gene_description=""
gene_id=""
go_id=""
go_ids_array=()
go_id_C_array=()
go_id_F_array=()
go_id_P_array=()
fi
done < "${uniprot_output}" >> "${parsed_uniprot}"
real 293m48.623s user 298m10.453s sys 41m57.464s
%%bash
cd "${analysis_dir}"
wc -l "${parsed_uniprot}"
echo ""
echo "------------------------------------------------------------------"
echo ""
head -n 25 "${parsed_uniprot}" | column -t
10304 20230322-pgen-accession-gene_name-gene_description-go_ids.tab ------------------------------------------------------------------ Q86IC9;Q552T5 8620183 omt5 Probable caffeoyl-CoA O-methyltransferase 1 O-methyltransferase 5 GO:0042409;GO:0046872;GO:0008757;GO:0032259 GO:0032259 GO:0042409;GO:0046872;GO:0008757 P04177 25085 Th Tyrosine 3-monooxygenase Tyrosine 3-hydroxylase GO:0030424;GO:0005737;GO:0009898;GO:0031410;GO:0030659;GO:0005829;GO:0030425;GO:0033162;GO:0005739;GO:0043005;GO:0043025;GO:0005634;GO:0043204;GO:0048471;GO:0005790;GO:0008021;GO:0043195;GO:0016597;GO:0035240;GO:0019899;GO:0008199;GO:0008198;GO:0042802;GO:0004497;GO:0019825;GO:0019904;GO:0034617;GO:0004511;GO:0015842;GO:0009887;GO:0042423;GO:0071312;GO:0071333;GO:0071363;GO:0071287;GO:0071316;GO:0071466;GO:0021987;GO:0042745;GO:0050890;GO:0042416;GO:0006585;GO:0042755;GO:0048596;GO:0042418;GO:0042462;GO:0006631;GO:0016137;GO:0007507;GO:1990384;GO:0033076;GO:0007612;GO:0007626;GO:0007617;GO:0007613;GO:0010259;GO:0042136;GO:0042421;GO:0018963;GO:0052314;GO:0008016;GO:0014823;GO:0001975;GO:0051412;GO:0051602;GO:0032355;GO:0045471;GO:0045472;GO:0070848;GO:0009635;GO:0001666;GO:0035902;GO:0017085;GO:0035900;GO:0009416;GO:0032496;GO:0010038;GO:0035094;GO:0031667;GO:0014070;GO:0043434;GO:0046684;GO:0009651;GO:0048545;GO:0009414;GO:0009410;GO:0010043;GO:0007605;GO:0035176;GO:0006665;GO:0001963;GO:0042214;GO:0007601 GO:0030424;GO:0005737;GO:0009898;GO:0031410;GO:0030659;GO:0005829;GO:0030425;GO:0033162;GO:0005739;GO:0043005;GO:0043025;GO:0005634;GO:0043204;GO:0048471;GO:0005790;GO:0008021;GO:0043195 GO:0015842;GO:0009887;GO:0042423;GO:0071312;GO:0071333;GO:0071363;GO:0071287;GO:0071316;GO:0071466;GO:0021987;GO:0042745;GO:0050890;GO:0042416;GO:0006585;GO:0042755;GO:0048596;GO:0042418;GO:0042462;GO:0006631;GO:0016137;GO:0007507;GO:1990384;GO:0033076;GO:0007612;GO:0007626;GO:0007617;GO:0007613;GO:0010259;GO:0042136;GO:0042421;GO:0018963;GO:0052314;GO:0008016;GO:0014823;GO:0001975;GO:0051412;GO:0051602;GO:0032355;GO:0045471;GO:0045472;GO:0070848;GO:0009635;GO:0001666;GO:0035902;GO:0017085;GO:0035900;GO:0009416;GO:0032496;GO:0010038;GO:0035094;GO:0031667;GO:0014070;GO:0043434;GO:0046684;GO:0009651;GO:0048545;GO:0009414;GO:0009410;GO:0010043;GO:0007605;GO:0035176;GO:0006665;GO:0001963;GO:0042214;GO:0007601 GO:0016597;GO:0035240;GO:0019899;GO:0008199;GO:0008198;GO:0042802;GO:0004497;GO:0019825;GO:0019904;GO:0034617;GO:0004511 Q8L840;O04092;Q9FT71 837636 RECQL4A ATP-dependent DNA helicase Q-like 4A SGS1-like protein GO:0005694;GO:0005737;GO:0005634;GO:0009506;GO:0043138;GO:0005524;GO:0016887;GO:0009378;GO:0046872;GO:0003676;GO:0071215;GO:0070417;GO:0006974;GO:0051276;GO:0032508;GO:0006310;GO:0006281;GO:0006268;GO:0000724 GO:0005694;GO:0005737;GO:0005634;GO:0009506 GO:0071215;GO:0070417;GO:0006974;GO:0051276;GO:0032508;GO:0006310;GO:0006281;GO:0006268;GO:0000724 GO:0043138;GO:0005524;GO:0016887;GO:0009378;GO:0046872;GO:0003676 Q61043;A0A1Y7VJL5;B2RQ73;B7ZMZ9;E9Q488;E9Q4S3;Q674R4;Q6ZPM7 18080 Nin Ninein SGS1-like protein GO:0045177;GO:0030424;GO:0044295;GO:0120103;GO:0005814;GO:0005813;GO:0097539;GO:0005881;GO:0030425;GO:0072686;GO:0097431;GO:0005730;GO:0005654;GO:0000242;GO:0005886;GO:0000922;GO:0005509;GO:0005525;GO:0019900;GO:0051011;GO:0010457;GO:0051642;GO:0090222;GO:0048668;GO:0021540;GO:0021957;GO:0034454;GO:0050772;GO:0031116;GO:0008104 GO:0045177;GO:0030424;GO:0044295;GO:0120103;GO:0005814;GO:0005813;GO:0097539;GO:0005881;GO:0030425;GO:0072686;GO:0097431;GO:0005730;GO:0005654;GO:0000242;GO:0005886;GO:0000922 GO:0010457;GO:0051642;GO:0090222;GO:0048668;GO:0021540;GO:0021957;GO:0034454;GO:0050772;GO:0031116;GO:0008104 GO:0005509;GO:0005525;GO:0019900;GO:0051011 A1E2V0 489433 BIRC3 Baculoviral IAP repeat-containing protein 3 RING-type E3 ubiquitin transferase BIRC3 GO:0005737;GO:0005829;GO:0005654;GO:0005634;GO:0043027;GO:0046872;GO:0061630;GO:0043066;GO:0060546;GO:0031398;GO:0051726 GO:0005737;GO:0005829;GO:0005654;GO:0005634 GO:0043066;GO:0060546;GO:0031398;GO:0051726 GO:0043027;GO:0046872;GO:0061630 P34456 186266 Uncharacterized protein F54H12.2 RING-type E3 ubiquitin transferase BIRC3 GO:0005829;GO:0004748;GO:0009263 GO:0005829 GO:0009263 GO:0004748 P34457 Putative uncharacterized transposon-derived protein F54H12.3 RING-type E3 ubiquitin transferase BIRC3 GO:0003676;GO:0015074 GO:0015074 GO:0003676 O00463;B4DIS9;B4E0A2;Q6FHY1 7188 TRAF5 TNF receptor-associated factor 5 RING finger protein 84 GO:0035631;GO:0005813;GO:0009898;GO:0005829;GO:0042802;GO:0031996;GO:0005164;GO:0031625;GO:0008270;GO:0006915;GO:0097400;GO:0048255;GO:0008284;GO:0051091;GO:0043123;GO:0046330;GO:0051092;GO:0070534;GO:0042981;GO:0043122;GO:0007165;GO:0023019;GO:0033209 GO:0035631;GO:0005813;GO:0009898;GO:0005829 GO:0006915;GO:0097400;GO:0048255;GO:0008284;GO:0051091;GO:0043123;GO:0046330;GO:0051092;GO:0070534;GO:0042981;GO:0043122;GO:0007165;GO:0023019;GO:0033209 GO:0042802;GO:0031996;GO:0005164;GO:0031625;GO:0008270 Q00945 Neurophysin RING finger protein 84 GO:0005576;GO:0005185 GO:0005576 GO:0005185 Q5SWK7;Q8BXX5;Q9CXG1 74315 Rnf145 RING finger protein 145 RING finger protein 84 GO:0012505;GO:0005783;GO:0005789;GO:0016021;GO:0061630;GO:0008270 GO:0012505;GO:0005783;GO:0005789;GO:0016021 GO:0061630;GO:0008270 Q8ZXT3 Uncharacterized protein PAE1111 RING finger protein 84 Q5REG4 100171717 DTX3 Probable E3 ubiquitin-protein ligase DTX3 RING-type E3 ubiquitin transferase DTX3 GO:0005737;GO:0046872;GO:0016740;GO:0007219;GO:0016567 GO:0005737 GO:0007219;GO:0016567 GO:0046872;GO:0016740 Q8QG60;Q8QG52;Q8QGQ5 374092 CRY2 Cryptochrome-2 RING-type E3 ubiquitin transferase DTX3 GO:0005737;GO:0005634;GO:0003677;GO:0071949;GO:0009881;GO:0032922;GO:0007623;GO:0043153;GO:0042754;GO:0045892;GO:0018298;GO:0042752;GO:0009416 GO:0005737;GO:0005634 GO:0032922;GO:0007623;GO:0043153;GO:0042754;GO:0045892;GO:0018298;GO:0042752;GO:0009416 GO:0003677;GO:0071949;GO:0009881 Q9H583;Q5T3Q8;Q6P197;Q9NW23 55127 HEATR1 HEAT repeat-containing protein 1 N-terminally processed U3 small nucleolar RNA-associated protein 10 homolog GO:0030686;GO:0001650;GO:0016020;GO:0005739;GO:0005730;GO:0005654;GO:0032040;GO:0034455;GO:0003723;GO:0030515;GO:0000462;GO:2000234;GO:0045943 GO:0030686;GO:0001650;GO:0016020;GO:0005739;GO:0005730;GO:0005654;GO:0032040;GO:0034455 GO:0000462;GO:2000234;GO:0045943 GO:0003723;GO:0030515 A0JMR6 779416 mysm1 Histone H2A deubiquitinase MYSM1 Myb-like SWIRM and MPN domain-containing protein 1 GO:0005634;GO:0003677;GO:0042393;GO:0070122;GO:0046872;GO:0140492;GO:0004843;GO:0003713;GO:0006338;GO:0035522;GO:0045944 GO:0005634 GO:0006338;GO:0035522;GO:0045944 GO:0003677;GO:0042393;GO:0070122;GO:0046872;GO:0140492;GO:0004843;GO:0003713 O88917;O09026;O35818;O88916 65096 Adgrl1 Adhesion G protein-coupled receptor L1 Latrophilin-1 GO:0030424;GO:0098978;GO:0030426;GO:0005887;GO:0099056;GO:0043005;GO:0005886;GO:0014069;GO:0042734;GO:0045202;GO:0030246;GO:0050839;GO:0004930;GO:0016524;GO:0015643;GO:0007189;GO:0007420;GO:0035584;GO:0007166;GO:0007157;GO:0051965;GO:0090129 GO:0030424;GO:0098978;GO:0030426;GO:0005887;GO:0099056;GO:0043005;GO:0005886;GO:0014069;GO:0042734;GO:0045202 GO:0007189;GO:0007420;GO:0035584;GO:0007166;GO:0007157;GO:0051965;GO:0090129 GO:0030246;GO:0050839;GO:0004930;GO:0016524;GO:0015643 Q7D513 egtB Hercynine oxygenase Gamma-glutamyl hercynylcysteine S-oxide synthase GO:0044875;GO:0005506;GO:0004497 GO:0044875;GO:0005506;GO:0004497 Q92968;B2RCS1 5194 PEX13 Peroxisomal membrane protein PEX13 Peroxin-13 GO:0005829;GO:0005779;GO:0016020;GO:1990429;GO:0005778;GO:0005777;GO:0021795;GO:0001561;GO:0007626;GO:0060152;GO:0001764;GO:0016560;GO:0001967 GO:0005829;GO:0005779;GO:0016020;GO:1990429;GO:0005778;GO:0005777 GO:0021795;GO:0001561;GO:0007626;GO:0060152;GO:0001764;GO:0016560;GO:0001967 A6H769 505507 RPS7 40S ribosomal protein S7 Peroxin-13 GO:0022627;GO:0005815;GO:0032040;GO:0003735;GO:0042274;GO:0006364;GO:0006412 GO:0022627;GO:0005815;GO:0032040 GO:0042274;GO:0006364;GO:0006412 GO:0003735 Q14676;A2AB04;A2BF04;A2RRA8;A7YY86;B0S8A2;Q0EFC2;Q2L6H7;Q2TAZ4Q5JP55;Q5JP56;Q5ST83;Q68CQ3;Q86Z06;Q96QC2 9656 MDC1 Mediator of DNA damage checkpoint protein 1 Nuclear factor with BRCT domains 1 GO:0005694;GO:0005925;GO:0016604;GO:0005654;GO:0005634;GO:0070975;GO:0008022;GO:0006281;GO:0031573 GO:0005694;GO:0005925;GO:0016604;GO:0005654;GO:0005634 GO:0006281;GO:0031573 GO:0070975;GO:0008022 Q54W11 8622324 mcfL Mitochondrial substrate carrier family protein L Nuclear factor with BRCT domains 1 GO:0016021;GO:0005743;GO:0055085 GO:0016021;GO:0005743 GO:0055085 Q9FKK7;Q8L759 835871 XYLA Xylose isomerase Nuclear factor with BRCT domains 1 GO:0005783;GO:0005794;GO:0000325;GO:0009536;GO:0099503;GO:0046872;GO:0009045;GO:0042843 GO:0005783;GO:0005794;GO:0000325;GO:0009536;GO:0099503 GO:0042843 GO:0046872;GO:0009045 Q3ZCD7 614105 TECR Very-long-chain enoyl-CoA reductase Trans-23-enoyl-CoA reductase GO:0005783;GO:0030176;GO:0016491;GO:0102758;GO:0030497;GO:0006665;GO:0006694;GO:0042761 GO:0005783;GO:0030176 GO:0030497;GO:0006665;GO:0006694;GO:0042761 GO:0016491;GO:0102758 Q9D180;A2ACY9 68625 Cfap57 Cilia- and flagella-associated protein 57 WD repeat-containing protein 65 Q9D6Z0;Q8K1H3;Q9CY41;Q9D942 66400 Alkbh7 Alpha-ketoglutarate-dependent dioxygenase alkB homolog 7 mitochondrial Alkylated DNA repair protein alkB homolog 7 GO:0005759;GO:0005739;GO:0051213;GO:0046872;GO:0006974;GO:0006631;GO:0010883;GO:1902445 GO:0005759;GO:0005739 GO:0006974;GO:0006631;GO:0010883;GO:1902445 GO:0051213;GO:0046872
%%html
<style>
table {margin-left: 0 !important;}
</style>
Output format (tab-delimited):
gene_ID | SPIDs | UniProt_gene_ID | gene | gene_description | alternate_gene_description | all_GO_IDs | BP_GO_IDs | CC_GO_IDs | MF_GO_IDs |
---|---|---|---|---|---|---|---|---|---|
Explanation:
awk -v FS='[;[:space:]]+'
: Sets the Field Separator variable to handle ;
(notice the
FNR == NR
: Restricts next block (designated by {}
) to work only on first input file.
{array[$1]=$0; next}
: Adds the entire line ($0
) of the first file to the array names array
and then moves on to the next set of commands for the second input file.
($1 in array)
: Looks for the value of the first column ($1
, which is SPID) from the second file to see if there's a match from the array (which contains the line from the first file).
{print $2,array[$1]}'
: If there's a match, print the second column ($2
, which is gene ID) from the second file, followed by the line from the first file.
"${parsed_uniprot}" "${spid_list}"
: The first and second input files.
"${joined_output}"
: Result of the join.
%%bash
cd "${analysis_dir}"
awk \
-v FS='[;[:space:]]+' \
'NR==FNR \
{array[$1]=$0; next} \
($1 in array) \
{print $2"\t"array[$1]}' \
"${parsed_uniprot}" "${genome_IDs_SPIDs}" \
> "${joined_output}"
%%bash
cd "${analysis_dir}"
wc -l "${joined_output}"
echo ""
echo "------------------------------------------------------------------"
echo ""
head -n 25 "${joined_output}" | column -t
14672 20230322-pgen-gene-accessions-gene_id-gene_name-gene_description-alt_gene_description-all_go_ids-C_go_ids-P_go_ids-F_go_ids.tab ------------------------------------------------------------------ PGEN_.00g000010 Q86IC9;Q552T5 8620183 omt5 Probable caffeoyl-CoA O-methyltransferase 1 O-methyltransferase 5 GO:0042409;GO:0046872;GO:0008757;GO:0032259 GO:0032259 GO:0042409;GO:0046872;GO:0008757 PGEN_.00g000020 P04177 25085 Th Tyrosine 3-monooxygenase Tyrosine 3-hydroxylase GO:0030424;GO:0005737;GO:0009898;GO:0031410;GO:0030659;GO:0005829;GO:0030425;GO:0033162;GO:0005739;GO:0043005;GO:0043025;GO:0005634;GO:0043204;GO:0048471;GO:0005790;GO:0008021;GO:0043195;GO:0016597;GO:0035240;GO:0019899;GO:0008199;GO:0008198;GO:0042802;GO:0004497;GO:0019825;GO:0019904;GO:0034617;GO:0004511;GO:0015842;GO:0009887;GO:0042423;GO:0071312;GO:0071333;GO:0071363;GO:0071287;GO:0071316;GO:0071466;GO:0021987;GO:0042745;GO:0050890;GO:0042416;GO:0006585;GO:0042755;GO:0048596;GO:0042418;GO:0042462;GO:0006631;GO:0016137;GO:0007507;GO:1990384;GO:0033076;GO:0007612;GO:0007626;GO:0007617;GO:0007613;GO:0010259;GO:0042136;GO:0042421;GO:0018963;GO:0052314;GO:0008016;GO:0014823;GO:0001975;GO:0051412;GO:0051602;GO:0032355;GO:0045471;GO:0045472;GO:0070848;GO:0009635;GO:0001666;GO:0035902;GO:0017085;GO:0035900;GO:0009416;GO:0032496;GO:0010038;GO:0035094;GO:0031667;GO:0014070;GO:0043434;GO:0046684;GO:0009651;GO:0048545;GO:0009414;GO:0009410;GO:0010043;GO:0007605;GO:0035176;GO:0006665;GO:0001963;GO:0042214;GO:0007601 GO:0030424;GO:0005737;GO:0009898;GO:0031410;GO:0030659;GO:0005829;GO:0030425;GO:0033162;GO:0005739;GO:0043005;GO:0043025;GO:0005634;GO:0043204;GO:0048471;GO:0005790;GO:0008021;GO:0043195 GO:0015842;GO:0009887;GO:0042423;GO:0071312;GO:0071333;GO:0071363;GO:0071287;GO:0071316;GO:0071466;GO:0021987;GO:0042745;GO:0050890;GO:0042416;GO:0006585;GO:0042755;GO:0048596;GO:0042418;GO:0042462;GO:0006631;GO:0016137;GO:0007507;GO:1990384;GO:0033076;GO:0007612;GO:0007626;GO:0007617;GO:0007613;GO:0010259;GO:0042136;GO:0042421;GO:0018963;GO:0052314;GO:0008016;GO:0014823;GO:0001975;GO:0051412;GO:0051602;GO:0032355;GO:0045471;GO:0045472;GO:0070848;GO:0009635;GO:0001666;GO:0035902;GO:0017085;GO:0035900;GO:0009416;GO:0032496;GO:0010038;GO:0035094;GO:0031667;GO:0014070;GO:0043434;GO:0046684;GO:0009651;GO:0048545;GO:0009414;GO:0009410;GO:0010043;GO:0007605;GO:0035176;GO:0006665;GO:0001963;GO:0042214;GO:0007601 GO:0016597;GO:0035240;GO:0019899;GO:0008199;GO:0008198;GO:0042802;GO:0004497;GO:0019825;GO:0019904;GO:0034617;GO:0004511 PGEN_.00g000050 Q8L840;O04092;Q9FT71 837636 RECQL4A ATP-dependent DNA helicase Q-like 4A SGS1-like protein GO:0005694;GO:0005737;GO:0005634;GO:0009506;GO:0043138;GO:0005524;GO:0016887;GO:0009378;GO:0046872;GO:0003676;GO:0071215;GO:0070417;GO:0006974;GO:0051276;GO:0032508;GO:0006310;GO:0006281;GO:0006268;GO:0000724 GO:0005694;GO:0005737;GO:0005634;GO:0009506 GO:0071215;GO:0070417;GO:0006974;GO:0051276;GO:0032508;GO:0006310;GO:0006281;GO:0006268;GO:0000724 GO:0043138;GO:0005524;GO:0016887;GO:0009378;GO:0046872;GO:0003676 PGEN_.00g000060 Q61043;A0A1Y7VJL5;B2RQ73;B7ZMZ9;E9Q488;E9Q4S3;Q674R4;Q6ZPM7 18080 Nin Ninein SGS1-like protein GO:0045177;GO:0030424;GO:0044295;GO:0120103;GO:0005814;GO:0005813;GO:0097539;GO:0005881;GO:0030425;GO:0072686;GO:0097431;GO:0005730;GO:0005654;GO:0000242;GO:0005886;GO:0000922;GO:0005509;GO:0005525;GO:0019900;GO:0051011;GO:0010457;GO:0051642;GO:0090222;GO:0048668;GO:0021540;GO:0021957;GO:0034454;GO:0050772;GO:0031116;GO:0008104 GO:0045177;GO:0030424;GO:0044295;GO:0120103;GO:0005814;GO:0005813;GO:0097539;GO:0005881;GO:0030425;GO:0072686;GO:0097431;GO:0005730;GO:0005654;GO:0000242;GO:0005886;GO:0000922 GO:0010457;GO:0051642;GO:0090222;GO:0048668;GO:0021540;GO:0021957;GO:0034454;GO:0050772;GO:0031116;GO:0008104 GO:0005509;GO:0005525;GO:0019900;GO:0051011 PGEN_.00g000080 A1E2V0 489433 BIRC3 Baculoviral IAP repeat-containing protein 3 RING-type E3 ubiquitin transferase BIRC3 GO:0005737;GO:0005829;GO:0005654;GO:0005634;GO:0043027;GO:0046872;GO:0061630;GO:0043066;GO:0060546;GO:0031398;GO:0051726 GO:0005737;GO:0005829;GO:0005654;GO:0005634 GO:0043066;GO:0060546;GO:0031398;GO:0051726 GO:0043027;GO:0046872;GO:0061630 PGEN_.00g000090 P34456 186266 Uncharacterized protein F54H12.2 RING-type E3 ubiquitin transferase BIRC3 GO:0005829;GO:0004748;GO:0009263 GO:0005829 GO:0009263 GO:0004748 PGEN_.00g000120 P34457 Putative uncharacterized transposon-derived protein F54H12.3 RING-type E3 ubiquitin transferase BIRC3 GO:0003676;GO:0015074 GO:0015074 GO:0003676 PGEN_.00g000210 O00463;B4DIS9;B4E0A2;Q6FHY1 7188 TRAF5 TNF receptor-associated factor 5 RING finger protein 84 GO:0035631;GO:0005813;GO:0009898;GO:0005829;GO:0042802;GO:0031996;GO:0005164;GO:0031625;GO:0008270;GO:0006915;GO:0097400;GO:0048255;GO:0008284;GO:0051091;GO:0043123;GO:0046330;GO:0051092;GO:0070534;GO:0042981;GO:0043122;GO:0007165;GO:0023019;GO:0033209 GO:0035631;GO:0005813;GO:0009898;GO:0005829 GO:0006915;GO:0097400;GO:0048255;GO:0008284;GO:0051091;GO:0043123;GO:0046330;GO:0051092;GO:0070534;GO:0042981;GO:0043122;GO:0007165;GO:0023019;GO:0033209 GO:0042802;GO:0031996;GO:0005164;GO:0031625;GO:0008270 PGEN_.00g000230 Q00945 Neurophysin RING finger protein 84 GO:0005576;GO:0005185 GO:0005576 GO:0005185 PGEN_.00g000240 Q5SWK7;Q8BXX5;Q9CXG1 74315 Rnf145 RING finger protein 145 RING finger protein 84 GO:0012505;GO:0005783;GO:0005789;GO:0016021;GO:0061630;GO:0008270 GO:0012505;GO:0005783;GO:0005789;GO:0016021 GO:0061630;GO:0008270 PGEN_.00g000280 Q8ZXT3 Uncharacterized protein PAE1111 RING finger protein 84 PGEN_.00g000300 Q5REG4 100171717 DTX3 Probable E3 ubiquitin-protein ligase DTX3 RING-type E3 ubiquitin transferase DTX3 GO:0005737;GO:0046872;GO:0016740;GO:0007219;GO:0016567 GO:0005737 GO:0007219;GO:0016567 GO:0046872;GO:0016740 PGEN_.00g000380 Q8QG60;Q8QG52;Q8QGQ5 374092 CRY2 Cryptochrome-2 RING-type E3 ubiquitin transferase DTX3 GO:0005737;GO:0005634;GO:0003677;GO:0071949;GO:0009881;GO:0032922;GO:0007623;GO:0043153;GO:0042754;GO:0045892;GO:0018298;GO:0042752;GO:0009416 GO:0005737;GO:0005634 GO:0032922;GO:0007623;GO:0043153;GO:0042754;GO:0045892;GO:0018298;GO:0042752;GO:0009416 GO:0003677;GO:0071949;GO:0009881 PGEN_.00g000440 Q9H583;Q5T3Q8;Q6P197;Q9NW23 55127 HEATR1 HEAT repeat-containing protein 1 N-terminally processed U3 small nucleolar RNA-associated protein 10 homolog GO:0030686;GO:0001650;GO:0016020;GO:0005739;GO:0005730;GO:0005654;GO:0032040;GO:0034455;GO:0003723;GO:0030515;GO:0000462;GO:2000234;GO:0045943 GO:0030686;GO:0001650;GO:0016020;GO:0005739;GO:0005730;GO:0005654;GO:0032040;GO:0034455 GO:0000462;GO:2000234;GO:0045943 GO:0003723;GO:0030515 PGEN_.00g000450 A0JMR6 779416 mysm1 Histone H2A deubiquitinase MYSM1 Myb-like SWIRM and MPN domain-containing protein 1 GO:0005634;GO:0003677;GO:0042393;GO:0070122;GO:0046872;GO:0140492;GO:0004843;GO:0003713;GO:0006338;GO:0035522;GO:0045944 GO:0005634 GO:0006338;GO:0035522;GO:0045944 GO:0003677;GO:0042393;GO:0070122;GO:0046872;GO:0140492;GO:0004843;GO:0003713 PGEN_.00g000460 O88917;O09026;O35818;O88916 65096 Adgrl1 Adhesion G protein-coupled receptor L1 Latrophilin-1 GO:0030424;GO:0098978;GO:0030426;GO:0005887;GO:0099056;GO:0043005;GO:0005886;GO:0014069;GO:0042734;GO:0045202;GO:0030246;GO:0050839;GO:0004930;GO:0016524;GO:0015643;GO:0007189;GO:0007420;GO:0035584;GO:0007166;GO:0007157;GO:0051965;GO:0090129 GO:0030424;GO:0098978;GO:0030426;GO:0005887;GO:0099056;GO:0043005;GO:0005886;GO:0014069;GO:0042734;GO:0045202 GO:0007189;GO:0007420;GO:0035584;GO:0007166;GO:0007157;GO:0051965;GO:0090129 GO:0030246;GO:0050839;GO:0004930;GO:0016524;GO:0015643 PGEN_.00g000490 Q7D513 egtB Hercynine oxygenase Gamma-glutamyl hercynylcysteine S-oxide synthase GO:0044875;GO:0005506;GO:0004497 GO:0044875;GO:0005506;GO:0004497 PGEN_.00g000520 Q92968;B2RCS1 5194 PEX13 Peroxisomal membrane protein PEX13 Peroxin-13 GO:0005829;GO:0005779;GO:0016020;GO:1990429;GO:0005778;GO:0005777;GO:0021795;GO:0001561;GO:0007626;GO:0060152;GO:0001764;GO:0016560;GO:0001967 GO:0005829;GO:0005779;GO:0016020;GO:1990429;GO:0005778;GO:0005777 GO:0021795;GO:0001561;GO:0007626;GO:0060152;GO:0001764;GO:0016560;GO:0001967 PGEN_.00g000530 A6H769 505507 RPS7 40S ribosomal protein S7 Peroxin-13 GO:0022627;GO:0005815;GO:0032040;GO:0003735;GO:0042274;GO:0006364;GO:0006412 GO:0022627;GO:0005815;GO:0032040 GO:0042274;GO:0006364;GO:0006412 GO:0003735 PGEN_.00g000540 Q14676;A2AB04;A2BF04;A2RRA8;A7YY86;B0S8A2;Q0EFC2;Q2L6H7;Q2TAZ4Q5JP55;Q5JP56;Q5ST83;Q68CQ3;Q86Z06;Q96QC2 9656 MDC1 Mediator of DNA damage checkpoint protein 1 Nuclear factor with BRCT domains 1 GO:0005694;GO:0005925;GO:0016604;GO:0005654;GO:0005634;GO:0070975;GO:0008022;GO:0006281;GO:0031573 GO:0005694;GO:0005925;GO:0016604;GO:0005654;GO:0005634 GO:0006281;GO:0031573 GO:0070975;GO:0008022 PGEN_.00g000560 Q54W11 8622324 mcfL Mitochondrial substrate carrier family protein L Nuclear factor with BRCT domains 1 GO:0016021;GO:0005743;GO:0055085 GO:0016021;GO:0005743 GO:0055085 PGEN_.00g000600 Q9FKK7;Q8L759 835871 XYLA Xylose isomerase Nuclear factor with BRCT domains 1 GO:0005783;GO:0005794;GO:0000325;GO:0009536;GO:0099503;GO:0046872;GO:0009045;GO:0042843 GO:0005783;GO:0005794;GO:0000325;GO:0009536;GO:0099503 GO:0042843 GO:0046872;GO:0009045 PGEN_.00g000660 Q3ZCD7 614105 TECR Very-long-chain enoyl-CoA reductase Trans-23-enoyl-CoA reductase GO:0005783;GO:0030176;GO:0016491;GO:0102758;GO:0030497;GO:0006665;GO:0006694;GO:0042761 GO:0005783;GO:0030176 GO:0030497;GO:0006665;GO:0006694;GO:0042761 GO:0016491;GO:0102758 PGEN_.00g000670 Q9D180;A2ACY9 68625 Cfap57 Cilia- and flagella-associated protein 57 WD repeat-containing protein 65 PGEN_.00g000680 Q9D6Z0;Q8K1H3;Q9CY41;Q9D942 66400 Alkbh7 Alpha-ketoglutarate-dependent dioxygenase alkB homolog 7 mitochondrial Alkylated DNA repair protein alkB homolog 7 GO:0005759;GO:0005739;GO:0051213;GO:0046872;GO:0006974;GO:0006631;GO:0010883;GO:1902445 GO:0005759;GO:0005739 GO:0006974;GO:0006631;GO:0010883;GO:1902445 GO:0051213;GO:0046872