# Import necessary libraries from pathlib import Path import os !pip install proteinflow &> /dev/null !apt-get install -qq -y mmseqs2 &> /dev/null from google.colab import drive drive.mount('/content/gdrive', force_remount=True) path = Path("/content/gdrive/") path_data = Path("/content/gdrive/MyDrive/data") import pandas as pd from slugify import slugify #!proteinflow generate --help # Species Homo Sapiens and Resolution 2.5 A sabdab_summary_url = 'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/summary/20240520_0899946/' sabdab_url = 'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/archive/20240520_0899946/' fname = slugify(sabdab_summary_url.split('/')[-2], lowercase=False) # Need to generate url fresh everytime !wget https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/summary/20240520_0899946/ -O {path_data}/{fname}_summary.tsv # Need to generate url fresh everytime !wget https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/archive/20240520_0899946/ -O {path_data}/{fname}.zip !ls {path_data} !proteinflow generate --sabdab \ --sabdab_data_path {path_data}/{fname}.zip --tag {fname} \ --resolution_thr 2.5 --not_remove_redundancies \ --min_seq_id 0.9 \ --local_datasets_folder {path_data} \ --valid_split 0.1 --test_split 0.1 \ --split_tolerance 0.05 !ls /content/gdrive/MyDrive/data/proteinflow_{fname}/ !proteinflow generate --help