b41b7b22886b91948dcf3fe7a2e2c6039e227aed
examples.md
... | ... | @@ -7,6 +7,7 @@ The following pages provide usage info and examples for select applications in t |
7 | 7 | - [ALPHAFOLD3](examples/alphafold3) |
8 | 8 | - [AlphaPulldown](examples/alphapulldown_example) |
9 | 9 | - [Arp/Warp registration](examples/arpwarp_registration) |
10 | +- [BLAST](examples/BLAST) |
|
10 | 11 | - [BOLTZ 2.1.1](examples/BOLTZ) |
11 | 12 | - [CARELESS](examples/careless) |
12 | 13 | - [DiffDock](examples/diffdock) |
examples/BLAST.md
... | ... | @@ -0,0 +1,70 @@ |
1 | + |
|
2 | +1: Download the PDBAA BLAST database from NCBI. This is a small premade database. It is ready to use with BLAST and does not require any formatting or further work. |
|
3 | + |
|
4 | +2: Download UniRef50 and create a BLAST database from it. Creating a BLAST database from the downloaded sequences may take 30 minutes or more. |
|
5 | + |
|
6 | +``` |
|
7 | +#!/usr/bin/env bash |
|
8 | + |
|
9 | +## BLAST db download/create |
|
10 | +## |
|
11 | +## James Vincent |
|
12 | +## help@sbgrid.org |
|
13 | +## June 13, 2025 |
|
14 | + |
|
15 | +## Start SBGrid environment |
|
16 | +source /programs/sbgrid.shrc |
|
17 | + |
|
18 | +## Always set version of blast in use |
|
19 | +export BLASTPLUS_X=2.16.0 |
|
20 | + |
|
21 | +## Make a directory for databases: |
|
22 | +mkdir $(pwd)/blast_databases |
|
23 | +cd $(pwd)/blast_databases |
|
24 | + |
|
25 | +## Download preformatted PDBAA database as small control |
|
26 | +## This uncompresses as a ready to use blast database named 'pdbaa' |
|
27 | +curl -kLO https://ftp.ncbi.nlm.nih.gov/blast/db/pdbaa.tar.gz |
|
28 | +tar zxf pdbaa.tar.gz |
|
29 | + |
|
30 | +## Download uniref50 fasta file |
|
31 | +curl -kLO https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz |
|
32 | + |
|
33 | +## Uncompress |
|
34 | +gunzip uniref50.fasta.gz |
|
35 | + |
|
36 | +## Create blastdb from fasta file - this will take a long time |
|
37 | +makeblastdb -in uniref50.fasta -dbtype prot -out uniref50 -parse_seqids |
|
38 | + |
|
39 | +## Set env var to location of blast databases |
|
40 | +cd .. |
|
41 | +export BLASTDB=$(pwd)/blast_databases |
|
42 | + |
|
43 | +## Create a sample query fasta file |
|
44 | +echo -e ">sp|P69905|HBA_HUMAN Hemoglobin subunit alpha OS=Homo sapiens OX=9606 GN=HBA1 PE=1 SV=2\nVLSPADKTN |
|
45 | +VKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF" > query.fasta |
|
46 | + |
|
47 | +## Create a fasta sequence from an existing PDB for searching 'pdbaa' as a positive control |
|
48 | +echo -e ">1CIF_1|Chain A|CYTOCHROME C|Saccharomyces cerevisiae (4932) \nTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGP |
|
49 | +NLHGIFGAHSGQAEGYSYTDANIKKNVLWDENNMSEYLTNPKKYIPGTKMASGGLKKEKDRNDLITYLKKAAE" > 1cif.fasta |
|
50 | + |
|
51 | +## Search PDB AA as a control |
|
52 | +blastp \ |
|
53 | + -query 1cif.fasta \ |
|
54 | + -db ${BLASTDB}/pdbaa \ |
|
55 | + -evalue 0.001 \ |
|
56 | + -outfmt 7 \ |
|
57 | + -out pdbaa_1cif_results.txt |
|
58 | + |
|
59 | +## Search uniref50 - will require large memory |
|
60 | +psiblast \ |
|
61 | + -query query.fasta \ |
|
62 | + -db ${BLASTDB}/uniref50.01 \ |
|
63 | + -evalue 0.001 \ |
|
64 | + -num_iterations 3 \ |
|
65 | + -outfmt 7 \ |
|
66 | + -out psiblast_results.txt |
|
67 | + |
|
68 | + |
|
69 | + |
|
70 | +``` |
|
... | ... | \ No newline at end of file |