import sys
import subprocess
import re
import os
import warnings
from tempfile import mkdtemp
from shutil import rmtree
from distutils.spawn import find_executable
from tempfile import gettempdir
from six import string_types
import oddt
from oddt.utils import (is_openbabel_molecule,
is_molecule,
check_molecule)
from oddt.spatial import rmsd
[docs]class autodock_vina(object):
def __init__(self,
protein=None,
auto_ligand=None,
size=(20, 20, 20),
center=(0, 0, 0),
exhaustiveness=8,
num_modes=9,
energy_range=3,
seed=None,
prefix_dir=None,
n_cpu=1,
executable=None,
autocleanup=True,
skip_bad_mols=True):
"""Autodock Vina docking engine, which extends it's capabilities:
automatic box (auto-centering on ligand).
Parameters
----------
protein: oddt.toolkit.Molecule object (default=None)
Protein object to be used while generating descriptors.
auto_ligand: oddt.toolkit.Molecule object or string (default=None)
Ligand use to center the docking box. Either ODDT molecule or
a file (opened based on extesion and read to ODDT molecule).
Box is centered on geometric center of molecule.
size: tuple, shape=[3] (default=(20, 20, 20))
Dimentions of docking box (in Angstroms)
center: tuple, shape=[3] (default=(0,0,0))
The center of docking box in cartesian space.
exhaustiveness: int (default=8)
Exhaustiveness parameter of Autodock Vina
num_modes: int (default=9)
Number of conformations generated by Autodock Vina. The maximum
number of docked poses is 9 (due to Autodock Vina limitation).
energy_range: int (default=3)
Energy range cutoff for Autodock Vina
seed: int or None (default=None)
Random seed for Autodock Vina
prefix_dir: string or None (default=None)
Temporary directory for Autodock Vina files.
By default (None) system temporary directory is used,
for reference see `tempfile.gettempdir`.
executable: string or None (default=None)
Autodock Vina executable location in the system.
It's realy necessary if autodetection fails.
autocleanup: bool (default=True)
Should the docking engine clean up after execution?
skip_bad_mols: bool (default=True)
Should molecules that crash Autodock Vina be skipped.
"""
self.dir = prefix_dir or gettempdir()
self._tmp_dir = None
# define binding site
self.size = size
self.center = center
# center automaticaly on ligand
if auto_ligand:
if isinstance(auto_ligand, string_types):
extension = auto_ligand.split('.')[-1]
auto_ligand = next(oddt.toolkit.readfile(extension, auto_ligand))
self.center = auto_ligand.coords.mean(axis=0).round(3)
# autodetect Vina executable
if not executable:
self.executable = find_executable('vina')
if not self.executable:
raise Exception('Could not find Autodock Vina binary.'
'You have to install it globaly or supply binary'
'full directory via `executable` parameter.')
else:
self.executable = executable
# detect version
self.version = (subprocess.check_output([self.executable, '--version'])
.decode('ascii').split(' ')[2])
self.autocleanup = autocleanup
self.cleanup_dirs = set()
# share protein to class
self.protein = None
self.protein_file = None
if protein:
self.set_protein(protein)
self.skip_bad_mols = skip_bad_mols
self.n_cpu = n_cpu
if self.n_cpu > exhaustiveness:
warnings.warn('Exhaustiveness is lower than n_cpus, thus CPU will '
'not be saturated.')
# pregenerate common Vina parameters
self.params = []
self.params += ['--center_x', str(self.center[0]),
'--center_y', str(self.center[1]),
'--center_z', str(self.center[2])]
self.params += ['--size_x', str(self.size[0]),
'--size_y', str(self.size[1]),
'--size_z', str(self.size[2])]
self.params += ['--exhaustiveness', str(exhaustiveness)]
if seed is not None:
self.params += ['--seed', str(seed)]
if num_modes > 9 or num_modes < 1:
raise ValueError('The number of docked poses must be between 1 and 9'
' (due to Autodock Vina limitation).')
self.params += ['--num_modes', str(num_modes)]
self.params += ['--energy_range', str(energy_range)]
@property
def tmp_dir(self):
if not self._tmp_dir:
self._tmp_dir = mkdtemp(dir=self.dir, prefix='autodock_vina_')
self.cleanup_dirs.add(self._tmp_dir)
return self._tmp_dir
@tmp_dir.setter
def tmp_dir(self, value):
self._tmp_dir = value
[docs] def set_protein(self, protein):
"""Change protein to dock to.
Parameters
----------
protein: oddt.toolkit.Molecule object
Protein object to be used.
"""
# generate new directory
self._tmp_dir = None
if protein:
if isinstance(protein, string_types):
extension = protein.split('.')[-1]
if extension == 'pdbqt':
self.protein_file = protein
self.protein = next(oddt.toolkit.readfile(extension, protein))
self.protein.protein = True
else:
self.protein = next(oddt.toolkit.readfile(extension, protein))
self.protein.protein = True
else:
self.protein = protein
# skip writing if we have PDBQT protein
if self.protein_file is None:
self.protein_file = write_vina_pdbqt(self.protein, self.tmp_dir,
flexible=False)
[docs] def score(self, ligands, protein=None):
"""Automated scoring procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to score
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default
one is used, else the protein is new default.
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if not self.protein_file:
raise IOError("No receptor.")
if is_molecule(ligands):
ligands = [ligands]
ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
check_molecule(ligand, force_coords=True)
ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
try:
scores = parse_vina_scoring_output(
subprocess.check_output([self.executable, '--score_only',
'--receptor', self.protein_file,
'--ligand', ligand_file] + self.params,
stderr=subprocess.STDOUT))
except subprocess.CalledProcessError as e:
sys.stderr.write(e.output.decode('ascii'))
if self.skip_bad_mols:
continue
else:
raise Exception('Autodock Vina failed. Command: "%s"' %
' '.join(e.cmd))
ligand.data.update(scores)
output_array.append(ligand)
rmtree(ligand_dir)
return output_array
[docs] def dock(self, ligands, protein=None):
"""Automated docking procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to dock
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default one
is used, else the protein is new default.
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if not self.protein_file:
raise IOError("No receptor.")
if is_molecule(ligands):
ligands = [ligands]
ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
check_molecule(ligand, force_coords=True)
ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
try:
scores = parse_vina_docking_output(
subprocess.check_output([self.executable, '--receptor',
self.protein_file,
'--ligand', ligand_file,
'--out', ligand_outfile] +
self.params +
['--cpu', str(self.n_cpu)],
stderr=subprocess.STDOUT))
except subprocess.CalledProcessError as e:
sys.stderr.write(e.output.decode('ascii'))
if self.skip_bad_mols:
continue # TODO: print some warning message
else:
raise Exception('Autodock Vina failed. Command: "%s"' %
' '.join(e.cmd))
# docked conformations may have wrong connectivity - use source ligand
if is_openbabel_molecule(ligand):
# find the order of PDBQT atoms assigned by OpenBabel
with open(ligand_file) as f:
write_order = [int(line[7:12].strip())
for line in f
if line[:4] == 'ATOM']
new_order = sorted(range(len(write_order)),
key=write_order.__getitem__)
new_order = [i + 1 for i in new_order] # OBMol has 1 based idx
assert len(new_order) == len(ligand.atoms)
docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
for docked_ligand, score in zip(docked_ligands, scores):
# Renumber atoms to match the input ligand
if is_openbabel_molecule(docked_ligand):
docked_ligand.OBMol.RenumberAtoms(new_order)
# HACK: copy docked coordinates onto source ligand
# We assume that the order of atoms match between ligands
clone = ligand.clone
clone.clone_coords(docked_ligand)
clone.data.update(score)
# Calculate RMSD to the input pose
try:
clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone,
method='min_symmetry')
except Exception:
pass
output_array.append(clone)
rmtree(ligand_dir)
return output_array
[docs] def clean(self):
for d in self.cleanup_dirs:
rmtree(d)
[docs] def predict_ligand(self, ligand):
"""Local method to score one ligand and update it's scores.
Parameters
----------
ligand: oddt.toolkit.Molecule object
Ligand to be scored
Returns
-------
ligand: oddt.toolkit.Molecule object
Scored ligand with updated scores
"""
return self.score([ligand])[0]
[docs] def predict_ligands(self, ligands):
"""Method to score ligands lazily
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to be scored
Returns
-------
ligand: iterator of oddt.toolkit.Molecule objects
Scored ligands with updated scores
"""
return self.score(ligands)
[docs]def write_vina_pdbqt(mol, directory, flexible=True, name_id=None):
"""Write single PDBQT molecule to a given directory. For proteins use
`flexible=False` to avoid encoding torsions. Additionally an name ID can
be appended to a name to avoid conflicts.
"""
if name_id is None:
name_id = ''
# We expect name such as 0_ZINC123456.pdbqt or simply ZINC123456.pdbqt if no
# name_id is specified. All non alpha-numeric signs are replaced with underscore.
mol_file = ('_'.join(filter(None, [str(name_id),
re.sub('[^A-Za-z0-9]+', '_', mol.title)]
)) + '.pdbqt')
# prepend path to filename
mol_file = os.path.join(directory, mol_file)
if is_openbabel_molecule(mol):
if flexible:
# auto bonding (b), perserve atom indices (p) and Hs (h)
kwargs = {'opt': {'b': None, 'p': None, 'h': None}}
else:
# for proteins write rigid mol (r) and combine all frags in one (c)
kwargs = {'opt': {'r': None, 'c': None, 'h': None}}
else:
kwargs = {'flexible': flexible}
mol.write('pdbqt', mol_file, overwrite=True, **kwargs)
return mol_file
[docs]def parse_vina_scoring_output(output):
"""Function parsing Autodock Vina scoring output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = {}
r = re.compile(r'^(Affinity:|\s{4})')
for line in output.decode('ascii').split('\n')[13:]: # skip some output
if r.match(line):
m = line.replace(' ', '').split(':')
if m[0] == 'Affinity':
m[1] = m[1].replace('(kcal/mol)', '')
out[str('vina_' + m[0].lower())] = float(m[1])
return out
[docs]def parse_vina_docking_output(output):
"""Function parsing Autodock Vina docking output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = []
r = re.compile(r'^\s+\d\s+')
for line in output.decode('ascii').split('\n')[13:]: # skip some output
if r.match(line):
s = line.split()
out.append({'vina_affinity': s[1],
'vina_rmsd_lb': s[2],
'vina_rmsd_ub': s[3]})
return out