Source code for oddt.scoring.descriptors

import numpy as np
from scipy.spatial.distance import cdist as distance

[docs]def atoms_by_type(atom_dict, types, mode = 'atomic_nums'):
    """Returns atom dictionaries based on given criteria. Currently we have 3 types of atom selection criteria:
        * atomic numbers ['atomic_nums']
        * Sybyl Atom Types ['atom_types_sybyl']
        * AutoDock4 atom types ['atom_types_ad4'] (http://autodock.scripps.edu/faqs-help/faq/where-do-i-set-the-autodock-4-force-field-parameters)
    
    Parameters
    ----------
        atom_dict: oddt.toolkit.Molecule.atom_dict
            Atom dictionary as implemeted in oddt.toolkit.Molecule class
        
        types: array-like
            List of atom types/numbers wanted.
    
    Returns
    -------
        out: dictionary of shape=[len(types)]
            A dictionary of queried atom types (types are keys of the dictionary). Values are of oddt.toolkit.Molecule.atom_dict type.
    """
    if mode == 'atomic_nums':
        return {num: atom_dict[atom_dict['atomicnum'] == num] for num in set(types)}
    elif mode == 'atom_types_sybyl':
        return {t: atom_dict[atom_dict['atomtype'] == t] for t in set(types)}
    elif mode == 'atom_types_ad4':
        # all AD4 atom types are capitalized
        types = [t.upper() for t in types]
        out = {}
        for t in set(types):
            if t == 'HD':
                out[t] = atom_dict[atom_dict['atomicnum'] == 1 & atom_dict['isdonorh']]
            elif t == 'C':
                out[t] = atom_dict[atom_dict['atomicnum'] == 6 & ~atom_dict['isaromatic']]
            elif t == 'CD': # not canonical AD4 type, although used by NNscore, with no description. properies assumed by name
                out[t] = atom_dict[atom_dict['atomicnum'] == 6 & ~atom_dict['isdonor']]
            elif t == 'A':
                out[t] = atom_dict[atom_dict['atomicnum'] == 6 & atom_dict['isaromatic']]
            elif t == 'N':
                out[t] = atom_dict[atom_dict['atomicnum'] == 7 & ~atom_dict['isacceptor']]
            elif t == 'NA':
                out[t] = atom_dict[atom_dict['atomicnum'] == 7 & atom_dict['isacceptor']]
            elif t == 'OA':
                out[t] = atom_dict[atom_dict['atomicnum'] == 8 & atom_dict['isacceptor']]
            elif t == 'F':
                out[t] = atom_dict[atom_dict['atomicnum'] == 9]
            elif t == 'MG':
                out[t] = atom_dict[atom_dict['atomicnum'] == 12]
            elif t == 'P':
                out[t] = atom_dict[atom_dict['atomicnum'] == 15]
            elif t == 'SA':
                out[t] = atom_dict[atom_dict['atomicnum'] == 16 & atom_dict['isacceptor']]
            elif t == 'S':
                out[t] = atom_dict[atom_dict['atomicnum'] == 16 & ~atom_dict['isacceptor']]
            elif t == 'CL':
                out[t] = atom_dict[atom_dict['atomicnum'] == 17]
            elif t == 'CA':
                out[t] = atom_dict[atom_dict['atomicnum'] == 20]
            elif t == 'MN':
                out[t] = atom_dict[atom_dict['atomicnum'] == 25]
            elif t == 'FE':
                out[t] = atom_dict[atom_dict['atomicnum'] == 26]
            elif t == 'CU':
                out[t] = atom_dict[atom_dict['atomicnum'] == 29]
            elif t == 'ZN':
                out[t] = atom_dict[atom_dict['atomicnum'] == 30]
            elif t == 'BR':
                out[t] = atom_dict[atom_dict['atomicnum'] == 35]
            elif t == 'I':
                out[t] = atom_dict[atom_dict['atomicnum'] == 53]
            else:
                 raise ValueError('Unsopported atom type: %s' % t)
        return out

[docs]class close_contacts(object):
    def __init__(self, protein = None, cutoff = 4, mode = 'atomic_nums', ligand_types = None, protein_types = None, aligned_pairs = False):
        """Close contacts descriptor which tallies atoms of type X in certain cutoff from atoms of type Y.
        
        Parameters
        ----------
            protein: oddt.toolkit.Molecule or None (default=None)
                Default protein to use as reference
            
            cutoff: int (default=4)
                Cutoff for atoms in Angstroms
            
            mode: string (default='atomic_nums')
                Method of atoms selection, as used in `atoms_by_type`
            
            ligand_types: array
                List of ligand atom types to use
            
            protein_types: array
                List of protein atom types to use
            
            aligned_pairs: bool (default=False)
                Flag indicating should permutation of types should be done, otherwise the atoms are treated as aligned pairs.
        """
        self.cutoff = cutoff
        self.protein = protein
        self.ligand_types = ligand_types
        self.protein_types = protein_types if protein_types else ligand_types
        self.aligned_pairs = aligned_pairs
        self.mode = mode
    
[docs]    def build(self, ligands, protein = None, single = False):
        """Builds descriptors for series of ligands
    
        Parameters
        ----------
            ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule
                A list or iterable of ligands to build the descriptor or a single molecule.
            
            protein: oddt.toolkit.Molecule or None (default=None)
                Default protein to use as reference
            
            single: bool (default=False)
                Flag indicating if the ligand is single.
        
        """
        if protein is None:
            protein = self.protein
        if single and type(ligands) is not list:
            ligands = [ligands]
#        prot_dict = atoms_by_type(protein.atom_dict, self.protein_types, self.mode)
        desc_size = len(self.ligand_types) if self.aligned_pairs else len(self.ligand_types)*len(self.protein_types)
        out = np.zeros(desc_size, dtype=int)
        for mol in ligands:
#            mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) 
            if self.aligned_pairs:
                #desc = np.array([(distance(prot_dict[str(prot_type)]['coords'], mol_dict[str(mol_type)]['coords']) <= self.cutoff).sum() for mol_type, prot_type in zip(self.ligand_types, self.protein_types)], dtype=int)
                # this must be LAZY!
                desc = np.array([(distance(atoms_by_type(protein.atom_dict, [prot_type], self.mode)[prot_type]['coords'], atoms_by_type(mol.atom_dict, [mol_type], self.mode)[mol_type]['coords']) <= self.cutoff).sum() for mol_type, prot_type in zip(self.ligand_types, self.protein_types)], dtype=int)
            else:
                desc = np.array([(distance(atoms_by_type(protein.atom_dict, [prot_type], self.mode)[prot_type]['coords'], atoms_by_type(mol.atom_dict, [mol_type], self.mode)[mol_type]['coords']) <= self.cutoff).sum() for mol_type in self.ligand_types for prot_type in self.protein_types], dtype=int)
            out = np.vstack((out, desc))
        return out[1:]
    
    def __reduce__(self):
        return close_contacts, (None, self.cutoff, self.mode, self.ligand_types, self.protein_types, self.aligned_pairs)
        
[docs]class fingerprints(object):
    def __init__(self, fp = 'fp2', toolkit = 'ob'):
        self.fp = fp
        self.exchange = False
        #if toolkit == oddt.toolkit.backend:
        #    self.exchange = False
        #else:
        #    self.exchange = True
        #    self.target_toolkit = __import__('toolkits.'+toolkit)
    
    def _get_fingerprint(self, mol):
        if self.exchange:
            mol = self.target_toolkit.Molecule(mol)
        return mol.calcfp(self.fp).raw
        
[docs]    def build(self, mols, single = False):
        if single:
            mols = [mols]
        out = None
        
        for mol in mols:
            fp = self._get_fingerprint(mol)
            if out is None:
                out = np.zeros_like(fp)
            out = np.vstack((fp, out))
        return out[1:]
    
    def __reduce__(self):
        return fingerprints, ()