from tempfile import mkdtemp
from shutil import rmtree
import sys
import six
import subprocess
import numpy as np
import re
from oddt import toolkit
[docs]class autodock_vina(object):
def __init__(self,
protein=None,
auto_ligand=None,
size=(10, 10, 10),
center=(0, 0, 0),
exhaustiveness=8,
num_modes=9,
energy_range=3,
seed=None,
prefix_dir='/tmp',
n_cpu=1,
executable=None,
autocleanup=True,
skip_bad_mols=True):
"""Autodock Vina docking engine, which extends it's capabilities:
automatic box (auto-centering on ligand).
Parameters
----------
protein: oddt.toolkit.Molecule object (default=None)
Protein object to be used while generating descriptors.
auto_ligand: oddt.toolkit.Molecule object or string (default=None)
Ligand use to center the docking box. Either ODDT molecule or
a file (opened based on extesion and read to ODDT molecule).
Box is centered on geometric center of molecule.
size: tuple, shape=[3] (default=(10,10,10))
Dimentions of docking box (in Angstroms)
center: tuple, shape=[3] (default=(0,0,0))
The center of docking box in cartesian space.
exhaustiveness: int (default=8)
Exhaustiveness parameter of Autodock Vina
num_modes: int (default=9)
Number of conformations generated by Autodock Vina
energy_range: int (default=3)
Energy range cutoff for Autodock Vina
seed: int or None (default=None)
Random seed for Autodock Vina
prefix_dir: string (default=/tmp)
Temporary directory for Autodock Vina files
executable: string or None (default=None)
Autodock Vina executable location in the system.
It's realy necessary if autodetection fails.
autocleanup: bool (default=True)
Should the docking engine clean up after execution?
skip_bad_mols: bool (default=True)
Should molecules that crash Autodock Vina be skipped.
"""
self.dir = prefix_dir
self._tmp_dir = None
# define binding site
self.size = size
self.center = center
# center automaticaly on ligand
if auto_ligand:
if type(auto_ligand) is str:
extension = auto_ligand.split('.')[-1]
auto_ligand = six.next(toolkit.readfile(extension, auto_ligand))
self.center = tuple(np.array([atom.coords for atom in auto_ligand],
dtype=np.float32).mean(axis=0))
# autodetect Vina executable
if not executable:
try:
self.executable = (subprocess.check_output(['which', 'vina'])
.decode('ascii').split('\n')[0])
except subprocess.CalledProcessError:
raise Exception('Could not find Autodock Vina binary.'
'You have to install it globaly or supply binary'
'full directory via `executable` parameter.')
else:
self.executable = executable
# detect version
self.version = (subprocess.check_output([self.executable, '--version'])
.decode('ascii').split(' ')[2])
self.autocleanup = autocleanup
self.cleanup_dirs = set()
# share protein to class
self.protein = None
self.protein_file = None
if protein:
self.set_protein(protein)
self.skip_bad_mols = skip_bad_mols
# pregenerate common Vina parameters
self.params = []
self.params += ['--center_x', str(self.center[0]),
'--center_y', str(self.center[1]),
'--center_z', str(self.center[2])]
self.params += ['--size_x', str(self.size[0]),
'--size_y', str(self.size[1]),
'--size_z', str(self.size[2])]
if n_cpu > 0:
self.params += ['--cpu', str(n_cpu)]
self.params += ['--exhaustiveness', str(exhaustiveness)]
if seed is not None:
self.params += ['--seed', str(seed)]
self.params += ['--num_modes', str(num_modes)]
self.params += ['--energy_range', str(energy_range)]
@property
def tmp_dir(self):
if not self._tmp_dir:
self._tmp_dir = mkdtemp(dir=self.dir, prefix='autodock_vina_')
self.cleanup_dirs.add(self._tmp_dir)
return self._tmp_dir
@tmp_dir.setter
def tmp_dir(self, value):
self._tmp_dir = value
[docs] def set_protein(self, protein):
"""Change protein to dock to.
Parameters
----------
protein: oddt.toolkit.Molecule object
Protein object to be used.
"""
# generate new directory
self._tmp_dir = None
if protein:
self.protein = protein
if type(protein) is str:
extension = protein.split('.')[-1]
if extension == 'pdbqt':
self.protein_file = protein
self.protein = six.next(toolkit.readfile(extension, protein))
else:
self.protein = six.next(toolkit.readfile(extension, protein))
self.protein.protein = True
self.protein_file = self.tmp_dir + '/protein.pdbqt'
self.protein.write('pdbqt', self.protein_file, opt={'r': None, 'c': None}, overwrite=True)
else:
# write protein to file
self.protein_file = self.tmp_dir + '/protein.pdbqt'
self.protein.write('pdbqt', self.protein_file, opt={'r': None, 'c': None}, overwrite=True)
[docs] def score(self, ligands, protein=None, single=False):
"""Automated scoring procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to score
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default
one is used, else the protein is new default.
single: bool (default=False)
A flag to indicate single ligand scoring - performance reasons
(eg. there is no need for subdirectory for one ligand)
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if not self.protein_file:
raise IOError("No receptor.")
if single:
ligands = [ligands]
ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
# write ligand to file
ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt'
ligand.write('pdbqt', ligand_file, overwrite=True, opt={'b': None})
try:
scores = parse_vina_scoring_output(subprocess.check_output([self.executable,
'--score_only',
'--receptor',
self.protein_file,
'--ligand',
ligand_file] + self.params,
stderr=subprocess.STDOUT))
except subprocess.CalledProcessError as e:
sys.stderr.write(e.output)
if self.skip_bad_mols:
continue
else:
raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd))
ligand.data.update(scores)
output_array.append(ligand)
rmtree(ligand_dir)
return output_array
[docs] def dock(self, ligands, protein=None, single=False):
"""Automated docking procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to dock
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default one
is used, else the protein is new default.
single: bool (default=False)
A flag to indicate single ligand docking - performance reasons
(eg. there is no need for subdirectory for one ligand)
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if not self.protein_file:
raise IOError("No receptor.")
if single:
ligands = [ligands]
ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
# write ligand to file
ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt'
ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt'
ligand.write('pdbqt', ligand_file, overwrite=True, opt={'b': None})
try:
vina = parse_vina_docking_output(subprocess.check_output([self.executable,
'--receptor',
self.protein_file,
'--ligand', ligand_file,
'--out', ligand_outfile] + self.params,
stderr=subprocess.STDOUT))
except subprocess.CalledProcessError as e:
sys.stderr.write(e.output.decode('ascii'))
if self.skip_bad_mols:
continue
else:
raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd))
# HACK # overcome connectivity problems in obabel
source_ligand = six.next(toolkit.readfile('pdbqt', ligand_file))
del source_ligand.data['REMARK']
for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina):
# HACK # copy data from source
clone = source_ligand.clone
clone.clone_coords(lig)
clone.data.update(scores)
output_array.append(clone)
rmtree(ligand_dir)
return output_array
[docs] def clean(self):
for d in self.cleanup_dirs:
rmtree(d)
[docs] def predict_ligand(self, ligand):
"""Local method to score one ligand and update it's scores.
Parameters
----------
ligand: oddt.toolkit.Molecule object
Ligand to be scored
Returns
-------
ligand: oddt.toolkit.Molecule object
Scored ligand with updated scores
"""
return self.score([ligand])[0]
[docs] def predict_ligands(self, ligands):
"""Method to score ligands lazily
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to be scored
Returns
-------
ligand: iterator of oddt.toolkit.Molecule objects
Scored ligands with updated scores
"""
return self.score(ligands)
[docs]def parse_vina_scoring_output(output):
"""Function parsing Autodock Vina scoring output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = {}
r = re.compile('^(Affinity:|\s{4})')
for line in output.decode('ascii').split('\n')[13:]: # skip some output
if r.match(line):
m = line.replace(' ', '').split(':')
if m[0] == 'Affinity':
m[1] = m[1].replace('(kcal/mol)', '')
out['vina_' + m[0].lower()] = float(m[1])
return out
[docs]def parse_vina_docking_output(output):
"""Function parsing Autodock Vina docking output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = []
r = re.compile('^\s+\d\s+')
for line in output.decode('ascii').split('\n')[13:]: # skip some output
if r.match(line):
s = line.split()
out.append({'vina_affinity': s[1], 'vina_rmsd_lb': s[2], 'vina_rmsd_ub': s[3]})
return out