3df97d88bc7257a8f54b43ef66a139b786f0f82d
examples/alphafold3.md
... | ... | @@ -1 +1,75 @@ |
1 | -# This is alphafold3 example |
|
... | ... | \ No newline at end of file |
0 | +## AlphaFold3 Example |
|
1 | + |
|
2 | +### Download databases |
|
3 | + |
|
4 | +The databases for AlphaFOld3 (AF3) must be downloaded. We provide the script we used below. Adjust the DATABASE_DIR as needed. |
|
5 | + |
|
6 | + |
|
7 | +``` |
|
8 | +#!/usr/bin/env bash |
|
9 | + |
|
10 | +## Alphafold 3.0.0 database download |
|
11 | +## Pulled from provided fetch_databases.py |
|
12 | +## |
|
13 | +## JV Nov 21, 2024 |
|
14 | +## help@sbgrid.org |
|
15 | + |
|
16 | +# get zstd in path |
|
17 | +export PATH=$PATH:$HOME/.local/bin |
|
18 | + |
|
19 | +files=( |
|
20 | + 'bfd-first_non_consensus_sequences.fasta.zst' |
|
21 | + 'mgy_clusters_2022_05.fa.zst' |
|
22 | + 'nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta.zst' |
|
23 | + 'pdb_2022_09_28_mmcif_files.tar.zst' |
|
24 | + 'pdb_seqres_2022_09_28.fasta.zst' |
|
25 | + 'rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta.zst' |
|
26 | + 'rnacentral_active_seq_id_90_cov_80_linclust.fasta.zst' |
|
27 | + 'uniprot_all_2021_04.fa.zst' |
|
28 | + 'uniref90_2022_05.fa.zst' |
|
29 | +) |
|
30 | + |
|
31 | +DATABASE_DIR="/programs/local/alphafold-3.0.0/databases" |
|
32 | +GOOGLE_URL="https://storage.googleapis.com/alphafold-databases/v3.0" |
|
33 | + |
|
34 | +for filename in "${files[@]}"; do |
|
35 | + echo "Fetching: $filename" |
|
36 | + echo "curl --progress-bar --continue-at - --output ${DATABASE_DIR}/${filename} ${GOOGLE_URL}/${filename} " |
|
37 | +done |
|
38 | + |
|
39 | +for filename in "${files[@]}"; do |
|
40 | + echo "Uncompressing: $filename" |
|
41 | + echo "zstd --decompress --force ${DATABASE_DIR}/${filename}" |
|
42 | +done |
|
43 | + |
|
44 | +``` |
|
45 | + |
|
46 | + |
|
47 | + |
|
48 | +The resulting layout: |
|
49 | + |
|
50 | +``` |
|
51 | +/programs//local/alphafold-3.0.0/databases/ |
|
52 | +├── bfd-first_non_consensus_sequences.fasta |
|
53 | +├── compressed |
|
54 | +│ ├── bfd-first_non_consensus_sequences.fasta.zst |
|
55 | +│ ├── mgy_clusters_2022_05.fa.zst |
|
56 | +│ ├── nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta.zst |
|
57 | +│ ├── pdb_2022_09_28_mmcif_files.tar.zst |
|
58 | +│ ├── pdb_seqres_2022_09_28.fasta.zst |
|
59 | +│ ├── rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta.zst |
|
60 | +│ ├── rnacentral_active_seq_id_90_cov_80_linclust.fasta.zst |
|
61 | +│ ├── uniprot_all_2021_04.fa.zst |
|
62 | +│ └── uniref90_2022_05.fa.zst |
|
63 | +├── mgy_clusters_2022_05.fa |
|
64 | +├── nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta |
|
65 | +├── pdb_2022_09_28_mmcif_files.tar |
|
66 | +├── pdb_seqres_2022_09_28.fasta |
|
67 | +├── rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta |
|
68 | +├── rnacentral_active_seq_id_90_cov_80_linclust.fasta |
|
69 | +├── uniprot_all_2021_04.fa |
|
70 | +└── uniref90_2022_05.fa |
|
71 | +``` |
|
72 | + |
|
73 | +### |
|
74 | +``` |