Source code for oddt.toolkits.extras.rdkit

from __future__ import absolute_import, print_function
from math import isnan, isinf
from itertools import combinations

import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem

METALS = (3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
          37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56, 57,
          58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
          75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94,
          95, 96, 97, 98, 99, 100, 101, 102, 103)


[docs]def PathFromAtomList(mol, amap): out = [] for i, j in combinations(amap, 2): bond = mol.GetBondBetweenAtoms(i, j) if bond: out.append(bond.GetIdx()) return out
[docs]def AtomListToSubMol(mol, amap, includeConformer=False): """ Parameters ---------- mol: rdkit.Chem.rdchem.Mol Molecule amap: array-like List of atom indices (zero-based) includeConformer: bool (default=True) Toogle to include atoms coordinates in submolecule. Returns ------- submol: rdkit.Chem.rdchem.RWMol Submol determined by specified atom list """ if not isinstance(amap, list): amap = list(amap) submol = Chem.RWMol(Chem.Mol()) for aix in amap: submol.AddAtom(mol.GetAtomWithIdx(aix)) for i, j in combinations(amap, 2): bond = mol.GetBondBetweenAtoms(i, j) if bond: submol.AddBond(amap.index(i), amap.index(j), bond.GetBondType()) if includeConformer: for conf in mol.GetConformers(): new_conf = Chem.Conformer(len(amap)) for i in range(len(amap)): new_conf.SetAtomPosition(i, conf.GetAtomPosition(amap[i])) new_conf.SetId(conf.GetId()) new_conf.Set3D(conf.Is3D()) submol.AddConformer(new_conf) return submol
[docs]def MolFromPDBBlock(molBlock, sanitize=True, removeHs=True, flavor=0): # before 2019.03 pre-sanitization is required pre_sanitize = False if sanitize and rdkit.__version__ <= '2018.09': pre_sanitize = True mol = Chem.MolFromPDBBlock(molBlock, sanitize=pre_sanitize, removeHs=removeHs, flavor=flavor) if mol is None: return None # Adjust connectivity for atom in mol.GetAtoms(): res = atom.GetPDBResidueInfo() if res is None: continue res_name = res.GetResidueName() atom_name = res.GetName().strip() # Fix missing double bonds in RDKit - double bonds if atom_name == 'O' and not res.GetIsHeteroAtom() and atom.GetDegree() == 1: atom.SetNoImplicit(True) atom.GetBonds()[0].SetBondType(Chem.BondType.DOUBLE) # Double bonds in sidechains if res_name in ['HID', 'HIE', 'HIP']: if atom_name == 'CD2': for bond in atom.GetBonds(): if bond.GetOtherAtom(atom).GetPDBResidueInfo().GetName().strip() == 'CG': bond.SetBondType(Chem.BondType.DOUBLE) break if res_name == 'HID': if atom_name == 'CE1': for bond in atom.GetBonds(): if bond.GetOtherAtom(atom).GetPDBResidueInfo().GetName().strip() == 'ND1': bond.SetBondType(Chem.BondType.DOUBLE) break elif res_name in ['HIE', 'HIP']: if atom_name == 'CE1': for bond in atom.GetBonds(): if bond.GetOtherAtom(atom).GetPDBResidueInfo().GetName().strip() == 'NE2': bond.SetBondType(Chem.BondType.DOUBLE) break mol.UpdatePropertyCache(strict=sanitize) # Set metal coordination (zero order) bond orders to single to prevent adding Hs if rdkit.__version__ >= '2018.03': for bond in mol.GetBonds(): if bond.GetBondType() == Chem.BondType.ZERO: a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() # single bonds only if there are enough electrons if ((a1.GetAtomicNum() in METALS and a2.GetNumImplicitHs() + a2.GetNumExplicitHs() > 0) or (a2.GetAtomicNum() in METALS and a1.GetNumImplicitHs() + a1.GetNumExplicitHs() > 0)): bond.SetBondType(Chem.BondType.SINGLE) if sanitize: result = Chem.SanitizeMol(mol) if result != 0: return None # Debug # for atom in mol.GetAtoms(): # res = atom.GetPDBResidueInfo() # if res is None: # continue # res_name = res.GetResidueName() # atom_name = res.GetName().strip() # if atom_name in ['NE2', 'ND1'] and res_name in ['HID', 'HIE', 'HIS']: # print(res_name, # atom_name, # atom.GetDegree(), # atom.GetTotalValence(), # atom.GetNumExplicitHs(), # atom.GetNumImplicitHs(), # sum(n.GetAtomicNum() == 1 for n in atom.GetNeighbors()), # sep='\t') return mol
# Mol2 Atom typing def _sybyl_atom_type(atom): """ Asign sybyl atom type Reference #1: http://www.tripos.com/mol2/atom_types.html Reference #2: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf """ sybyl = None atom_symbol = atom.GetSymbol() atomic_num = atom.GetAtomicNum() hyb = atom.GetHybridization()-1 # -1 since 1 = sp, 2 = sp1 etc hyb = min(hyb, 3) degree = atom.GetDegree() aromtic = atom.GetIsAromatic() # define groups for atom types guanidine = '[NX3,NX2]([!O,!S])!@C(!@[NX3,NX2]([!O,!S]))!@[NX3,NX2]([!O,!S])' # strict # guanidine = '[NX3]([!O])([!O])!:C!:[NX3]([!O])([!O])' # corina compatible # guanidine = '[NX3]!@C(!@[NX3])!@[NX3,NX2]' # guanidine = '[NX3]C([NX3])=[NX2]' # guanidine = '[NX3H1,NX2,NX3H2]C(=[NH1])[NH2]' # previous # if atomic_num == 6: if aromtic: sybyl = 'C.ar' elif degree == 3 and _atom_matches_smarts(atom, guanidine): sybyl = 'C.cat' else: sybyl = '%s.%i' % (atom_symbol, hyb) elif atomic_num == 7: if aromtic: sybyl = 'N.ar' elif _atom_matches_smarts(atom, 'C(=[O,S])-N'): sybyl = 'N.am' elif degree == 3 and _atom_matches_smarts(atom, '[$(N!-*),$([NX3H1]-*!-*)]'): sybyl = 'N.pl3' elif _atom_matches_smarts(atom, guanidine): # guanidine has N.pl3 sybyl = 'N.pl3' elif degree == 4 or hyb == 3 and atom.GetFormalCharge(): sybyl = 'N.4' else: sybyl = '%s.%i' % (atom_symbol, hyb) elif atomic_num == 8: # http://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html if degree == 1 and _atom_matches_smarts(atom, '[CX3](=O)[OX1H0-]'): sybyl = 'O.co2' elif degree == 2 and not aromtic: # Aromatic Os are sp2 sybyl = 'O.3' else: sybyl = 'O.2' elif atomic_num == 16: # http://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html if degree == 3 and _atom_matches_smarts(atom, '[$([#16X3]=[OX1]),$([#16X3+][OX1-])]'): sybyl = 'S.O' # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv elif _atom_matches_smarts(atom, 'S(=,-[OX1;+0,-1])(=,-[OX1;+0,-1])(-[#6])-[#6]'): sybyl = 'S.o2' else: sybyl = '%s.%i' % (atom_symbol, hyb) elif atomic_num == 15 and hyb == 3: sybyl = '%s.%i' % (atom_symbol, hyb) if not sybyl: sybyl = atom_symbol return sybyl def _atom_matches_smarts(atom, smarts): idx = atom.GetIdx() patt = Chem.MolFromSmarts(smarts) for m in atom.GetOwningMol().GetSubstructMatches(patt): if idx in m: return True return False def _amide_bond(bond): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 or a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7): # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv patt = Chem.MolFromSmarts('C(=O)-N') for m in bond.GetOwningMol().GetSubstructMatches(patt): if a1.GetIdx() in m and a2.GetIdx() in m: return True return False
[docs]def MolFromPDBQTBlock(block, sanitize=True, removeHs=True): """Read PDBQT block to a RDKit Molecule Parameters ---------- block: string Residue name which explicitly pint to a ligand(s). sanitize: bool (default=True) Should the sanitization be performed removeHs: bool (default=True) Should hydrogens be removed when reading molecule. Returns ------- mol: rdkit.Chem.rdchem.Mol Molecule read from PDBQT """ pdb_lines = [] name = '' data = {} for line in block.split('\n'): # Get all know data from REMARK section if line[:12] == 'REMARK Name': name = line[15:].strip() elif line[:18] == 'REMARK VINA RESULT': tmp = line[19:].split() data['vina_affinity'] = tmp[0] data['vina_rmsd_lb'] = tmp[1] data['vina_rmsd_ub'] = tmp[2] # no more data to collect if line[:4] != 'ATOM': continue pdb_line = line[:56] pdb_line += '1.00 0.00 ' # Do proper atom type lookup atom_type = line[71:].split()[1] if atom_type == 'A': atom_type = 'C' elif atom_type[:1] == 'O': atom_type = 'O' elif atom_type[:1] == 'H': atom_type = 'H' if removeHs: continue elif atom_type == 'NA': atom_type = 'N' pdb_lines.append(pdb_line + atom_type) mol = Chem.MolFromPDBBlock('\n'.join(pdb_lines), sanitize=False) if sanitize: Chem.SanitizeMol(mol) else: Chem.GetSSSR(mol) # reorder atoms using serial new_order = sorted(range(mol.GetNumAtoms()), key=lambda i: (mol.GetAtomWithIdx(i) .GetPDBResidueInfo() .GetSerialNumber())) mol = Chem.RenumberAtoms(mol, new_order) # properties must be set on final copy of Mol, RenumberAtoms purges data mol.SetProp('_Name', name) for k, v in data.items(): mol.SetProp(str(k), str(v)) return mol
[docs]def PDBQTAtomLines(mol, donors, acceptors): """Create a list with PDBQT atom lines for each atom in molecule. Donors and acceptors are given as a list of atom indices. """ atom_lines = [line.replace('HETATM', 'ATOM ') for line in Chem.MolToPDBBlock(mol).split('\n') if line.startswith('HETATM') or line.startswith('ATOM')] pdbqt_lines = [] for idx, atom in enumerate(mol.GetAtoms()): pdbqt_line = atom_lines[idx][:56] pdbqt_line += '0.00 0.00 ' # append empty vdW and ele # Get charge charge = 0. fields = ['_MMFF94Charge', '_GasteigerCharge', '_TriposPartialCharge'] for f in fields: if atom.HasProp(f): charge = atom.GetDoubleProp(f) break # FIXME: this should not happen, blame RDKit if isnan(charge) or isinf(charge): charge = 0. pdbqt_line += ('%.3f' % charge).rjust(6) # Get atom type pdbqt_line += ' ' atomicnum = atom.GetAtomicNum() if atomicnum == 6 and atom.GetIsAromatic(): pdbqt_line += 'A' elif atomicnum == 7 and idx in acceptors: pdbqt_line += 'NA' elif atomicnum == 8 and idx in acceptors: pdbqt_line += 'OA' elif atomicnum == 1 and atom.GetNeighbors()[0].GetIdx() in donors: pdbqt_line += 'HD' else: pdbqt_line += atom.GetSymbol() pdbqt_lines.append(pdbqt_line) return pdbqt_lines
[docs]def MolToPDBQTBlock(mol, flexible=True, addHs=False, computeCharges=False): """Write RDKit Molecule to a PDBQT block Parameters ---------- mol: rdkit.Chem.rdchem.Mol Molecule with a protein ligand complex flexible: bool (default=True) Should the molecule encode torsions. Ligands should be flexible, proteins in turn can be rigid. addHs: bool (default=False) The PDBQT format requires at least polar Hs on donors. By default Hs are added. computeCharges: bool (default=False) Should the partial charges be automatically computed. If the Hs are added the charges must and will be recomputed. If there are no partial charge information, they are set to 0.0. Returns ------- block: str String wit PDBQT encoded molecule """ # make a copy of molecule mol = Chem.Mol(mol) # if flexible molecule contains multiple fragments write them separately if flexible and len(Chem.GetMolFrags(mol)) > 1: return ''.join(MolToPDBQTBlock(frag, flexible=flexible, addHs=addHs, computeCharges=computeCharges) for frag in Chem.GetMolFrags(mol, asMols=True)) # Identify donors and acceptors for atom typing # Acceptors patt = Chem.MolFromSmarts('[$([O;H1;v2]),' '$([O;H0;v2;!$(O=N-*),' '$([O;-;!$(*-N=O)]),' '$([o;+0])]),' '$([n;+0;!X3;!$([n;H1](cc)cc),' '$([$([N;H0]#[C&v4])]),' '$([N&v3;H0;$(Nc)])]),' '$([F;$(F-[#6]);!$(FC[F,Cl,Br,I])])]') acceptors = list(map(lambda x: x[0], mol.GetSubstructMatches(patt, maxMatches=mol.GetNumAtoms()))) # Donors patt = Chem.MolFromSmarts('[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),' '$([$(n[n;H1]),' '$(nc[n;H1])])]),' # Guanidine can be tautormeic - e.g. Arginine '$([NX3,NX2]([!O,!S])!@C(!@[NX3,NX2]([!O,!S]))!@[NX3,NX2]([!O,!S])),' '$([O,S;H1;+0])]') donors = list(map(lambda x: x[0], mol.GetSubstructMatches(patt, maxMatches=mol.GetNumAtoms()))) if addHs: mol = Chem.AddHs(mol, addCoords=True, onlyOnAtoms=donors, ) if addHs or computeCharges: AllChem.ComputeGasteigerCharges(mol) atom_lines = PDBQTAtomLines(mol, donors, acceptors) assert len(atom_lines) == mol.GetNumAtoms() pdbqt_lines = [] # vina scores if (mol.HasProp('vina_affinity') and mol.HasProp('vina_rmsd_lb') and mol.HasProp('vina_rmsd_lb')): pdbqt_lines.append('REMARK VINA RESULT: ' + ('%.1f' % float(mol.GetProp('vina_affinity'))).rjust(8) + ('%.3f' % float(mol.GetProp('vina_rmsd_lb'))).rjust(11) + ('%.3f' % float(mol.GetProp('vina_rmsd_ub'))).rjust(11)) pdbqt_lines.append('REMARK Name = ' + (mol.GetProp('_Name') if mol.HasProp('_Name') else '')) if flexible: # Find rotatable bonds rot_bond = Chem.MolFromSmarts('[!$(*#*)&!D1&!$(C(F)(F)F)&' '!$(C(Cl)(Cl)Cl)&' '!$(C(Br)(Br)Br)&' '!$(C([CH3])([CH3])[CH3])&' '!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&' '!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&' '!$([CD3](=[N+])-!@[#7!D1])&' '!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&' '!D1&!$(C(F)(F)F)&' '!$(C(Cl)(Cl)Cl)&' '!$(C(Br)(Br)Br)&' '!$(C([CH3])([CH3])[CH3])]') bond_atoms = list(mol.GetSubstructMatches(rot_bond)) num_torsions = len(bond_atoms) # Active torsions header pdbqt_lines.append('REMARK %i active torsions:' % num_torsions) pdbqt_lines.append('REMARK status: (\'A\' for Active; \'I\' for Inactive)') for i, (a1, a2) in enumerate(bond_atoms): pdbqt_lines.append('REMARK%5.0i A between atoms: _%i and _%i' % (i + 1, a1 + 1, a2 + 1)) # Fragment molecule on bonds to ge rigid fragments bond_ids = [mol.GetBondBetweenAtoms(a1, a2).GetIdx() for a1, a2 in bond_atoms] if bond_ids: mol_rigid_frags = Chem.FragmentOnBonds(mol, bond_ids, addDummies=False) else: mol_rigid_frags = mol frags = list(Chem.GetMolFrags(mol_rigid_frags)) def weigh_frags(frag): """sort by the fragment size and the number of bonds (secondary)""" num_bonds = 0 # bond_weight = 0 for a1, a2 in bond_atoms: if a1 in frag or a2 in frag: num_bonds += 1 # for frag2 in frags: # if a1 in frag2 or a2 in frag2: # bond_weight += len(frag2) # changed signs are fixing mixed sorting type (ascending/descending) return -len(frag), -num_bonds, # bond_weight frags = sorted(frags, key=weigh_frags) # Start writting the lines with ROOT pdbqt_lines.append('ROOT') frag = frags.pop(0) for idx in frag: pdbqt_lines.append(atom_lines[idx]) pdbqt_lines.append('ENDROOT') # Now build the tree of torsions usign DFS algorithm. Keep track of last # route with following variables to move down the tree and close branches branch_queue = [] current_root = frag old_roots = [frag] visited_frags = [] visited_bonds = [] while len(frags) > len(visited_frags): end_branch = True for frag_num, frag in enumerate(frags): for bond_num, (a1, a2) in enumerate(bond_atoms): if (frag_num not in visited_frags and bond_num not in visited_bonds and (a1 in current_root and a2 in frag or a2 in current_root and a1 in frag)): # direction of bonds is important if a1 in current_root: bond_dir = '%i %i' % (a1 + 1, a2 + 1) else: bond_dir = '%i %i' % (a2 + 1, a1 + 1) pdbqt_lines.append('BRANCH %s' % bond_dir) for idx in frag: pdbqt_lines.append(atom_lines[idx]) branch_queue.append('ENDBRANCH %s' % bond_dir) # Overwrite current root and stash previous one in queue old_roots.append(current_root) current_root = frag # remove used elements from stack visited_frags.append(frag_num) visited_bonds.append(bond_num) # mark that we dont want to end branch yet end_branch = False break else: continue break # break the outer loop as well if end_branch: pdbqt_lines.append(branch_queue.pop()) if old_roots: current_root = old_roots.pop() # close opened branches if any is open while len(branch_queue): pdbqt_lines.append(branch_queue.pop()) pdbqt_lines.append('TORSDOF %i' % num_torsions) else: pdbqt_lines.extend(atom_lines) return '\n'.join(pdbqt_lines)