BLAST

1: Download the PDBAA BLAST database from NCBI. This is a small premade database. It is ready to use with BLAST and does not require any formatting or further work.

2: Download UniRef50 and create a BLAST database from it. Creating a BLAST database from the downloaded sequences may take 30 minutes or more.

#!/usr/bin/env bash

## BLAST db download/create 
## 
## James Vincent
## help@sbgrid.org
## June 13, 2025

## Start SBGrid environment
source /programs/sbgrid.shrc

## Always set version of blast in use
export BLASTPLUS_X=2.16.0

## Make a directory for databases:
mkdir $(pwd)/blast_databases
cd $(pwd)/blast_databases

## Download preformatted PDBAA database as small control
## This uncompresses as a ready to use blast database named 'pdbaa'
curl -kLO https://ftp.ncbi.nlm.nih.gov/blast/db/pdbaa.tar.gz
tar zxf pdbaa.tar.gz

## Download uniref50 fasta file 
curl -kLO https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz

## Uncompress 
gunzip uniref50.fasta.gz

## Create blastdb from fasta file  - this will take a long time
makeblastdb -in uniref50.fasta -dbtype prot -out uniref50 -parse_seqids

## Set env var to location of blast databases
cd ..
export BLASTDB=$(pwd)/blast_databases

## Create a sample query fasta file
echo -e ">sp|P69905|HBA_HUMAN Hemoglobin subunit alpha OS=Homo sapiens OX=9606 GN=HBA1 PE=1 SV=2\nVLSPADKTN
VKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF" > query.fasta

## Create a fasta sequence from an existing PDB for searching 'pdbaa' as a positive control
echo -e ">1CIF_1|Chain A|CYTOCHROME C|Saccharomyces cerevisiae (4932) \nTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGP
NLHGIFGAHSGQAEGYSYTDANIKKNVLWDENNMSEYLTNPKKYIPGTKMASGGLKKEKDRNDLITYLKKAAE" > 1cif.fasta
 
## Search PDB AA as a control
blastp  \
   -query 1cif.fasta \
   -db ${BLASTDB}/pdbaa \
   -evalue 0.001 \
   -outfmt 7 \
   -out pdbaa_1cif_results.txt

## Search uniref50 - will require large memory
psiblast \
   -query query.fasta \
   -db ${BLASTDB}/uniref50.01 \
   -evalue 0.001 \
   -num_iterations 3 \
   -outfmt 7 \
   -out psiblast_results.txt