Source code for oddt.docking.AutodockVina

from tempfile import mkdtemp
from shutil import rmtree
from os.path import exists
from os import remove
import subprocess
import numpy as np
import re
from random import random
from oddt import toolkit

[docs]class autodock_vina(object): def __init__(self, protein=None, auto_ligand=None, size=(10,10,10), center=(0,0,0), exhaustivness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True): """Autodock Vina docking engine, which extends it's capabilities: automatic box (autocentering on ligand). Parameters ---------- protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. auto_ligand: oddt.toolkit.Molecule object or string (default=None) Ligand use to center the docking box. Either ODDT molecule or a file (opened based on extesion and read to ODDT molecule). Box is centered on geometric center of molecule. size: tuple, shape=[3] (default=(10,10,10)) Dimentions of docking box (in Angstroms) center: tuple, shape=[3] (default=(0,0,0)) The center of docking box in cartesian space. exhaustiveness: int (default=8) Exhaustiveness parameter of Autodock Vina num_modes: int (default=9) Number of conformations generated by Autodock Vina energy_range: int (default=3) Energy range cutoff for Autodock Vina seed: int or None (default=None) Random seed for Autodock Vina prefix_dir: string (default=/tmp) Temporary directory for Autodock Vina files executable: string or None (default=None) Autodock Vina executable location in the system. It's realy necessary if autodetection fails. autocleanup: bool (default=True) Should the docking engine clean up after execution? """ self.dir = prefix_dir self._tmp_dir = None # define binding site self.size = size self.center = center # center automaticaly on ligand if auto_ligand: if type(auto_ligand) is str: extension = auto_ligand.split('.')[-1] auto_ligand = toolkit.readfile(extension, auto_ligand).next() self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float32).mean(axis=0)) # autodetect Vina executable if not executable: try: self.executable = subprocess.check_output(['which', 'vina']).split('\n')[0] except subprocess.CalledProcessError: raise Exception('Could not find Autodock Vina binary. You have to install it globaly or supply binary full directory via `executable` parameter.') else: self.executable = executable # detect version self.version = subprocess.check_output([self.executable, '--version']).split(' ')[2] self.autocleanup = autocleanup self.cleanup_dirs = set() # share protein to class if protein: self.set_protein(protein) #pregenerate common Vina parameters self.params = [] self.params += ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])] self.params += ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])] if n_cpu > 0: self.params += ['--cpu', str(n_cpu)] self.params += ['--exhaustiveness', str(exhaustivness)] if not seed is None: self.params += ['--seed', str(seed)] self.params += ['--num_modes', str(num_modes)] self.params += ['--energy_range', str(energy_range)] @property def tmp_dir(self): if not self._tmp_dir: self._tmp_dir = mkdtemp(dir = self.dir, prefix='autodock_vina_') self.cleanup_dirs.add(self._tmp_dir) return self._tmp_dir @tmp_dir.setter def tmp_dir(self, value): self._tmp_dir = value
[docs] def set_protein(self, protein): """Change protein to dock to. Parameters ---------- protein: oddt.toolkit.Molecule object Protein object to be used. """ # generate new directory self._tmp_dir = None if protein: self.protein = protein if type(protein) is str: extension = protein.split('.')[-1] if extension == 'pdbqt': self.protein_file = protein self.protein = toolkit.readfile(extension, protein).next() else: self.protein = toolkit.readfile(extension, protein).next() self.protein.protein = True self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True) else: # write protein to file self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True)
[docs] def score(self, ligands, protein = None, single = False): """Automated scoring procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to score protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. single: bool (default=False) A flag to indicate single ligand scoring (performance reasons (eg. there is no need for subdirectory for one ligand) Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if single: ligands = [ligands] ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True) scores = parse_vina_scoring_output(subprocess.check_output([self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file] + self.params, stderr=subprocess.STDOUT)) ligand.data.update(scores) output_array.append(ligand) rmtree(ligand_dir) return output_array
[docs] def dock(self, ligands, protein = None, single = False): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. single: bool (default=False) A flag to indicate single ligand docking (performance reasons (eg. there is no need for subdirectory for one ligand) Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if single: ligands = [ligands] ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt' ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True) vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT)) ### HACK # overcome connectivity problems in obabel source_ligand = toolkit.readfile('pdbqt', ligand_file).next() for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina): ### HACK # copy data from source clone = source_ligand.clone clone.clone_coords(lig) clone.data.update(scores) output_array.append(clone) rmtree(ligand_dir) return output_array
[docs] def clean(self): for d in self.cleanup_dirs: rmtree(d)
[docs]def parse_vina_scoring_output(output): """Function parsing Autodock Vina scoring output to a dictionary Parameters ---------- output : string Autodock Vina standard ouptud (STDOUT). Returns ------- out : dict dicitionary containing scores computed by Autodock Vina """ out = {} r = re.compile('^(Affinity:|\s{4})') for line in output.split('\n')[13:]: # skip some output if r.match(line): m = line.replace(' ','').split(':') if m[0] == 'Affinity': m[1] = m[1].replace('(kcal/mol)','') out['vina_'+m[0].lower()] = float(m[1]) return out
[docs]def parse_vina_docking_output(output): """Function parsing Autodock Vina docking output to a dictionary Parameters ---------- output : string Autodock Vina standard ouptud (STDOUT). Returns ------- out : dict dicitionary containing scores computed by Autodock Vina """ out = [] r = re.compile('^\s+\d\s+') for line in output.split('\n')[13:]: # skip some output if r.match(line): s = line.split() out.append({'vina_affinity': s[1], 'vina_rmsd_lb': s[2], 'vina_rmsd_ub': s[3]}) return out