cace77bb8266d237d976c2afcb8fcbe86f066f6c
examples/run_alphafold_template.sh
... | ... | @@ -1,57 +1,103 @@ |
1 | 1 | #!/bin/bash |
2 | -# Jason Key 1fac4ac 2021-07-23 08:33:47 -0400 |
|
2 | +# Jason Key 9e4d1a4 2021-07-29 11:04:26 -0400 |
|
3 | 3 | # Copyright © 2021 SBGrid Consortium. All rights reserved. |
4 | 4 | # |
5 | -# wrapper script for alphafold |
|
5 | +# Wrapper script for alphafold |
|
6 | +# |
|
7 | +# |
|
8 | +# To set single GPU |
|
9 | +# export CUDA_VISIBLE_DEVICES=0 |
|
10 | +# |
|
11 | +# Useful variables used by this script : |
|
12 | +# |
|
13 | +# ALPHAFOLD_DB : set path to database directories when set |
|
14 | +# ALPHAFOLD_PTM : Use pTM models when set |
|
15 | +# ALPHAFOLD_PRESET : use reduced_dbs or CASP14 databases$Id$ |
|
16 | +# ALPHAFOLD_TEMPLATE : date string for limiting template search |
|
6 | 17 | |
7 | -USAGE="$(basename $0): A wrapper script for running Alphafold jobs in the SBGrid Software installation \n\n |
|
8 | -Usage: $(basename $0) [fasta file] [path to output directory] \n\n" |
|
18 | +USAGE="$(basename $0): A wrapper script for running Alphafold jobs in the SBGrid Software installation |
|
19 | +Usage: $(basename $0) [fasta file] [path to output directory]" |
|
9 | 20 | |
10 | 21 | if [ $# -eq 0 ]; then |
11 | - echo -e $USAGE |
|
12 | - exit 1 |
|
22 | + echo $USAGE |
|
23 | + exit 1 |
|
13 | 24 | fi |
14 | 25 | |
15 | -## To set single GPU |
|
16 | -# export CUDA_VISIBLE_DEVICES=0 |
|
26 | +input_fasta=$1 |
|
27 | +if [ ! -f ${input_fasta} ]; then |
|
28 | + echo "${input_fasta} is not a file. Exiting... " |
|
29 | + exit 1 |
|
30 | +fi |
|
31 | + |
|
32 | +# Output directory |
|
33 | +output_dir="/tmp/alphafold" |
|
34 | +if [ $2 ]; then |
|
35 | + output_dir=$2 |
|
36 | +fi |
|
37 | +mkdir -p "${output_dir}" |
|
17 | 38 | |
39 | +## ALPHAFOLD databases |
|
18 | 40 | data_dir="/programs/local/alphafold/" |
19 | -if [ ! -z ${ALPHAFOLD_DB} ] && [ -d ${ALPHAFOLD_DB} ] ; then |
|
20 | - data_dir=${ALPHAFOLD_DB} |
|
41 | +if [ ! -z ${ALPHAFOLD_DB} ] && [ -d ${ALPHAFOLD_DB} ]; then |
|
42 | + data_dir=${ALPHAFOLD_DB} |
|
21 | 43 | fi |
22 | 44 | |
23 | -if [ ! -d ${data_dir} ] ; then |
|
45 | +if [ ! -d ${data_dir} ]; then |
|
24 | 46 | echo "${data_dir} is not a directory. Exiting... " |
25 | 47 | exit 1 |
48 | +else |
|
49 | + echo "Using databases in ${data_dir}" |
|
26 | 50 | fi |
27 | 51 | |
28 | -echo "Using databases in ${data_dir}" |
|
29 | - |
|
30 | -input_fasta=$1 |
|
52 | +# Set pTM models |
|
53 | +if [ ! -z ${ALPHAFOLD_PTM} ]; then |
|
54 | + model_names="model_1_ptm,model_2_ptm,model_3_ptm,model_4_ptm,model_5_ptm" |
|
55 | + echo -e "Using pTM models \n" |
|
56 | +else |
|
57 | + model_names="model_1,model_2,model_3,model_4,model_5" |
|
58 | +fi |
|
31 | 59 | |
32 | -if [ ! -f ${input_fasta} ] ; then |
|
33 | - echo "${input_fasta} is not a file. Exiting... " |
|
34 | - exit 1 |
|
60 | +# Filter by date |
|
61 | +if [ ! -z ${ALPHAFOLD_TEMPLATE} ]; then |
|
62 | + template_date=${ALPHAFOLD_TEMPLATE} |
|
63 | + echo -e "Using max-template-date $template_date \n" |
|
64 | +else |
|
65 | + template_date="2020-05-14" |
|
35 | 66 | fi |
36 | 67 | |
37 | -output_dir="/tmp/alphafold" |
|
38 | -if [ $2 ] ; then |
|
39 | - output_dir=$2 |
|
68 | +# database presets |
|
69 | +if [ ! -z ${ALPHAFOLD_PRESET} ] && [ ${ALPHAFOLD_PRESET} == 'reduced_dbs' ]; then |
|
70 | + preset_args="--preset=reduced_dbs --small_bfd_database_path=${data_dir}/small_bfd" |
|
71 | + echo -e "Using --preset=reduced_dbs \n" |
|
72 | +elif [ ! -z ${ALPHAFOLD_PRESET} ] && [ ${ALPHAFOLD_PRESET} == 'casp14' ]; then |
|
73 | + preset_args="-preset=casp14 \ |
|
74 | +--bfd_database_path=${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \ |
|
75 | +--uniclust30_database_path=${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 " |
|
76 | + echo -e "Using --preset=casp14 \n" |
|
77 | +# default is full_dbs |
|
78 | +else |
|
79 | + preset_args="-preset=full_dbs \ |
|
80 | +--bfd_database_path=${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \ |
|
81 | +--uniclust30_database_path=${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 " |
|
40 | 82 | fi |
41 | 83 | |
42 | -mkdir -p "${output_dir}" |
|
84 | +## |
|
85 | +base_args="--data_dir=${data_dir} --output_dir=${output_dir} --fasta_paths=${input_fasta} \ |
|
86 | +--model_names=${model_names} --max_template_date=${template_date} " |
|
87 | + |
|
88 | +database_args="--uniref90_database_path=${data_dir}/uniref90/uniref90.fasta \ |
|
89 | +--mgnify_database_path=${data_dir}/mgnify/mgy_clusters.fa \ |
|
90 | +--pdb70_database_path=${data_dir}/pdb70/pdb70 \ |
|
91 | +--template_mmcif_dir=${data_dir}/pdb_mmcif/mmcif_files \ |
|
92 | +--obsolete_pdbs_path=$data_dir/pdb_mmcif/obsolete.dat " |
|
43 | 93 | |
44 | -/programs/x86_64-linux/alphafold/2.0.0/bin.capsules/run_alphafold.py \ |
|
45 | ---model_names="model_1,model_2,model_3,model_4,model_5" \ |
|
46 | ---data_dir="${data_dir}" \ |
|
47 | ---uniref90_database_path="${data_dir}/uniref90/uniref90.fasta" \ |
|
48 | ---mgnify_database_path="${data_dir}/mgnify/mgy_clusters.fa" \ |
|
49 | ---uniclust30_database_path="${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08" \ |
|
50 | ---bfd_database_path="${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ |
|
51 | ---pdb70_database_path="${data_dir}/pdb70/pdb70" \ |
|
52 | ---template_mmcif_dir="${data_dir}/pdb_mmcif/mmcif_files" \ |
|
53 | ---obsolete_pdbs_path="$data_dir/pdb_mmcif/obsolete.dat" \ |
|
54 | ---max_template_date="2020-05-14" \ |
|
55 | ---preset="full_dbs" \ |
|
56 | ---output_dir="${output_dir}" \ |
|
57 | ---fasta_paths="${input_fasta}" |
|
94 | +args="$base_args $preset_args $database_args" |
|
95 | +alphafold_py="/programs/x86_64-linux/alphafold/2.0.0/bin.capsules/run_alphafold.py" |
|
96 | + |
|
97 | +if [ ! -f $alphafold_py ]; then |
|
98 | + echo "Error - $alphafold_py not found" |
|
99 | + exit 1 |
|
100 | +else |
|
101 | + echo "${alphafold_py} ${args}" |
|
102 | + ${alphafold_py} ${args} |
|
103 | +fi |