examples/run_alphafold_template.sh
... ...
@@ -1,57 +1,103 @@
1 1
#!/bin/bash
2
-# Jason Key 1fac4ac 2021-07-23 08:33:47 -0400
2
+# Jason Key 9e4d1a4 2021-07-29 11:04:26 -0400
3 3
# Copyright © 2021 SBGrid Consortium. All rights reserved.
4 4
#
5
-# wrapper script for alphafold
5
+# Wrapper script for alphafold
6
+#
7
+#
8
+# To set single GPU
9
+# export CUDA_VISIBLE_DEVICES=0
10
+#
11
+# Useful variables used by this script :
12
+#
13
+# ALPHAFOLD_DB : set path to database directories when set
14
+# ALPHAFOLD_PTM : Use pTM models when set
15
+# ALPHAFOLD_PRESET : use reduced_dbs or CASP14 databases$Id$
16
+# ALPHAFOLD_TEMPLATE : date string for limiting template search
6 17
7
-USAGE="$(basename $0): A wrapper script for running Alphafold jobs in the SBGrid Software installation \n\n
8
-Usage: $(basename $0) [fasta file] [path to output directory] \n\n"
18
+USAGE="$(basename $0): A wrapper script for running Alphafold jobs in the SBGrid Software installation
19
+Usage: $(basename $0) [fasta file] [path to output directory]"
9 20
10 21
if [ $# -eq 0 ]; then
11
- echo -e $USAGE
12
- exit 1
22
+ echo $USAGE
23
+ exit 1
13 24
fi
14 25
15
-## To set single GPU
16
-# export CUDA_VISIBLE_DEVICES=0
26
+input_fasta=$1
27
+if [ ! -f ${input_fasta} ]; then
28
+ echo "${input_fasta} is not a file. Exiting... "
29
+ exit 1
30
+fi
31
+
32
+# Output directory
33
+output_dir="/tmp/alphafold"
34
+if [ $2 ]; then
35
+ output_dir=$2
36
+fi
37
+mkdir -p "${output_dir}"
17 38
39
+## ALPHAFOLD databases
18 40
data_dir="/programs/local/alphafold/"
19
-if [ ! -z ${ALPHAFOLD_DB} ] && [ -d ${ALPHAFOLD_DB} ] ; then
20
- data_dir=${ALPHAFOLD_DB}
41
+if [ ! -z ${ALPHAFOLD_DB} ] && [ -d ${ALPHAFOLD_DB} ]; then
42
+ data_dir=${ALPHAFOLD_DB}
21 43
fi
22 44
23
-if [ ! -d ${data_dir} ] ; then
45
+if [ ! -d ${data_dir} ]; then
24 46
echo "${data_dir} is not a directory. Exiting... "
25 47
exit 1
48
+else
49
+ echo "Using databases in ${data_dir}"
26 50
fi
27 51
28
-echo "Using databases in ${data_dir}"
29
-
30
-input_fasta=$1
52
+# Set pTM models
53
+if [ ! -z ${ALPHAFOLD_PTM} ]; then
54
+ model_names="model_1_ptm,model_2_ptm,model_3_ptm,model_4_ptm,model_5_ptm"
55
+ echo -e "Using pTM models \n"
56
+else
57
+ model_names="model_1,model_2,model_3,model_4,model_5"
58
+fi
31 59
32
-if [ ! -f ${input_fasta} ] ; then
33
- echo "${input_fasta} is not a file. Exiting... "
34
- exit 1
60
+# Filter by date
61
+if [ ! -z ${ALPHAFOLD_TEMPLATE} ]; then
62
+ template_date=${ALPHAFOLD_TEMPLATE}
63
+ echo -e "Using max-template-date $template_date \n"
64
+else
65
+ template_date="2020-05-14"
35 66
fi
36 67
37
-output_dir="/tmp/alphafold"
38
-if [ $2 ] ; then
39
- output_dir=$2
68
+# database presets
69
+if [ ! -z ${ALPHAFOLD_PRESET} ] && [ ${ALPHAFOLD_PRESET} == 'reduced_dbs' ]; then
70
+ preset_args="--preset=reduced_dbs --small_bfd_database_path=${data_dir}/small_bfd"
71
+ echo -e "Using --preset=reduced_dbs \n"
72
+elif [ ! -z ${ALPHAFOLD_PRESET} ] && [ ${ALPHAFOLD_PRESET} == 'casp14' ]; then
73
+ preset_args="-preset=casp14 \
74
+--bfd_database_path=${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
75
+--uniclust30_database_path=${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 "
76
+ echo -e "Using --preset=casp14 \n"
77
+# default is full_dbs
78
+else
79
+ preset_args="-preset=full_dbs \
80
+--bfd_database_path=${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
81
+--uniclust30_database_path=${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 "
40 82
fi
41 83
42
-mkdir -p "${output_dir}"
84
+##
85
+base_args="--data_dir=${data_dir} --output_dir=${output_dir} --fasta_paths=${input_fasta} \
86
+--model_names=${model_names} --max_template_date=${template_date} "
87
+
88
+database_args="--uniref90_database_path=${data_dir}/uniref90/uniref90.fasta \
89
+--mgnify_database_path=${data_dir}/mgnify/mgy_clusters.fa \
90
+--pdb70_database_path=${data_dir}/pdb70/pdb70 \
91
+--template_mmcif_dir=${data_dir}/pdb_mmcif/mmcif_files \
92
+--obsolete_pdbs_path=$data_dir/pdb_mmcif/obsolete.dat "
43 93
44
-/programs/x86_64-linux/alphafold/2.0.0/bin.capsules/run_alphafold.py \
45
---model_names="model_1,model_2,model_3,model_4,model_5" \
46
---data_dir="${data_dir}" \
47
---uniref90_database_path="${data_dir}/uniref90/uniref90.fasta" \
48
---mgnify_database_path="${data_dir}/mgnify/mgy_clusters.fa" \
49
---uniclust30_database_path="${data_dir}/uniclust30/uniclust30_2018_08/uniclust30_2018_08" \
50
---bfd_database_path="${data_dir}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \
51
---pdb70_database_path="${data_dir}/pdb70/pdb70" \
52
---template_mmcif_dir="${data_dir}/pdb_mmcif/mmcif_files" \
53
---obsolete_pdbs_path="$data_dir/pdb_mmcif/obsolete.dat" \
54
---max_template_date="2020-05-14" \
55
---preset="full_dbs" \
56
---output_dir="${output_dir}" \
57
---fasta_paths="${input_fasta}"
94
+args="$base_args $preset_args $database_args"
95
+alphafold_py="/programs/x86_64-linux/alphafold/2.0.0/bin.capsules/run_alphafold.py"
96
+
97
+if [ ! -f $alphafold_py ]; then
98
+ echo "Error - $alphafold_py not found"
99
+ exit 1
100
+else
101
+ echo "${alphafold_py} ${args}"
102
+ ${alphafold_py} ${args}
103
+fi