from tempfile import mkdtemp
from shutil import rmtree
from os.path import exists
from os import remove
import subprocess
import numpy as np
import re
from random import random
from oddt import toolkit
[docs]class autodock_vina(object):
def __init__(self, protein=None, auto_ligand=None, size=(10,10,10), center=(0,0,0), exhaustiveness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True):
"""Autodock Vina docking engine, which extends it's capabilities: automatic box (autocentering on ligand).
Parameters
----------
protein: oddt.toolkit.Molecule object (default=None)
Protein object to be used while generating descriptors.
auto_ligand: oddt.toolkit.Molecule object or string (default=None)
Ligand use to center the docking box. Either ODDT molecule or a file (opened based on extesion and read to ODDT molecule). Box is centered on geometric center of molecule.
size: tuple, shape=[3] (default=(10,10,10))
Dimentions of docking box (in Angstroms)
center: tuple, shape=[3] (default=(0,0,0))
The center of docking box in cartesian space.
exhaustiveness: int (default=8)
Exhaustiveness parameter of Autodock Vina
num_modes: int (default=9)
Number of conformations generated by Autodock Vina
energy_range: int (default=3)
Energy range cutoff for Autodock Vina
seed: int or None (default=None)
Random seed for Autodock Vina
prefix_dir: string (default=/tmp)
Temporary directory for Autodock Vina files
executable: string or None (default=None)
Autodock Vina executable location in the system. It's realy necessary if autodetection fails.
autocleanup: bool (default=True)
Should the docking engine clean up after execution?
"""
self.dir = prefix_dir
self._tmp_dir = None
# define binding site
self.size = size
self.center = center
# center automaticaly on ligand
if auto_ligand:
if type(auto_ligand) is str:
extension = auto_ligand.split('.')[-1]
auto_ligand = toolkit.readfile(extension, auto_ligand).next()
self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float32).mean(axis=0))
# autodetect Vina executable
if not executable:
try:
self.executable = subprocess.check_output(['which', 'vina']).split('\n')[0]
except subprocess.CalledProcessError:
raise Exception('Could not find Autodock Vina binary. You have to install it globaly or supply binary full directory via `executable` parameter.')
else:
self.executable = executable
# detect version
self.version = subprocess.check_output([self.executable, '--version']).split(' ')[2]
self.autocleanup = autocleanup
self.cleanup_dirs = set()
# share protein to class
if protein:
self.set_protein(protein)
#pregenerate common Vina parameters
self.params = []
self.params += ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])]
self.params += ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])]
if n_cpu > 0:
self.params += ['--cpu', str(n_cpu)]
self.params += ['--exhaustiveness', str(exhaustiveness)]
if not seed is None:
self.params += ['--seed', str(seed)]
self.params += ['--num_modes', str(num_modes)]
self.params += ['--energy_range', str(energy_range)]
@property
def tmp_dir(self):
if not self._tmp_dir:
self._tmp_dir = mkdtemp(dir = self.dir, prefix='autodock_vina_')
self.cleanup_dirs.add(self._tmp_dir)
return self._tmp_dir
@tmp_dir.setter
def tmp_dir(self, value):
self._tmp_dir = value
[docs] def set_protein(self, protein):
"""Change protein to dock to.
Parameters
----------
protein: oddt.toolkit.Molecule object
Protein object to be used.
"""
# generate new directory
self._tmp_dir = None
if protein:
self.protein = protein
if type(protein) is str:
extension = protein.split('.')[-1]
if extension == 'pdbqt':
self.protein_file = protein
self.protein = toolkit.readfile(extension, protein).next()
else:
self.protein = toolkit.readfile(extension, protein).next()
self.protein.protein = True
self.protein_file = self.tmp_dir + '/protein.pdbqt'
self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True)
else:
# write protein to file
self.protein_file = self.tmp_dir + '/protein.pdbqt'
self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True)
[docs] def score(self, ligands, protein = None, single = False):
"""Automated scoring procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to score
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default one is used, else the protein is new default.
single: bool (default=False)
A flag to indicate single ligand scoring (performance reasons (eg. there is no need for subdirectory for one ligand)
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if single:
ligands = [ligands]
ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
# write ligand to file
ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt'
ligand.write('pdbqt', ligand_file, overwrite=True)
scores = parse_vina_scoring_output(subprocess.check_output([self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file] + self.params, stderr=subprocess.STDOUT))
ligand.data.update(scores)
output_array.append(ligand)
rmtree(ligand_dir)
return output_array
[docs] def dock(self, ligands, protein = None, single = False):
"""Automated docking procedure.
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to dock
protein: oddt.toolkit.Molecule object or None
Protein object to be used. If None, then the default one is used, else the protein is new default.
single: bool (default=False)
A flag to indicate single ligand docking (performance reasons (eg. there is no need for subdirectory for one ligand)
Returns
-------
ligands : array of oddt.toolkit.Molecule objects
Array of ligands (scores are stored in mol.data method)
"""
if protein:
self.set_protein(protein)
if single:
ligands = [ligands]
ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_')
output_array = []
for n, ligand in enumerate(ligands):
# write ligand to file
ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt'
ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt'
ligand.write('pdbqt', ligand_file, overwrite=True)
vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT))
### HACK # overcome connectivity problems in obabel
source_ligand = toolkit.readfile('pdbqt', ligand_file).next()
for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina):
### HACK # copy data from source
clone = source_ligand.clone
clone.clone_coords(lig)
clone.data.update(scores)
output_array.append(clone)
rmtree(ligand_dir)
return output_array
[docs] def clean(self):
for d in self.cleanup_dirs:
rmtree(d)
[docs] def predict_ligand(self, ligand):
"""Local method to score one ligand and update it's scores.
Parameters
----------
ligand: oddt.toolkit.Molecule object
Ligand to be scored
Returns
-------
ligand: oddt.toolkit.Molecule object
Scored ligand with updated scores
"""
return self.score([ligand])[0]
[docs] def predict_ligands(self, ligands):
"""Method to score ligands lazily
Parameters
----------
ligands: iterable of oddt.toolkit.Molecule objects
Ligands to be scored
Returns
-------
ligand: iterator of oddt.toolkit.Molecule objects
Scored ligands with updated scores
"""
return self.score(ligands)
[docs]def parse_vina_scoring_output(output):
"""Function parsing Autodock Vina scoring output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = {}
r = re.compile('^(Affinity:|\s{4})')
for line in output.split('\n')[13:]: # skip some output
if r.match(line):
m = line.replace(' ','').split(':')
if m[0] == 'Affinity':
m[1] = m[1].replace('(kcal/mol)','')
out['vina_'+m[0].lower()] = float(m[1])
return out
[docs]def parse_vina_docking_output(output):
"""Function parsing Autodock Vina docking output to a dictionary
Parameters
----------
output : string
Autodock Vina standard ouptud (STDOUT).
Returns
-------
out : dict
dicitionary containing scores computed by Autodock Vina
"""
out = []
r = re.compile('^\s+\d\s+')
for line in output.split('\n')[13:]: # skip some output
if r.match(line):
s = line.split()
out.append({'vina_affinity': s[1], 'vina_rmsd_lb': s[2], 'vina_rmsd_ub': s[3]})
return out