Source code for oddt.spatial

"""Spatial functions included in ODDT
Mainly used by other modules, but can be accessed directly.
"""

from math import sin, cos

import numpy as np
from scipy.spatial.distance import cdist
# for Hungarian algorithm, in future use scipy.optimize.linear_sum_assignment (in scipy 0.17+)
try:
    from scipy.optimize import linear_sum_assignment
except ImportError:
    from sklearn.utils.linear_assignment_ import linear_assignment

    def linear_sum_assignment(M):
        out = linear_assignment(M)
        return out[:, 0], out[:, 1]

import oddt
from oddt.utils import is_openbabel_molecule

__all__ = ['angle',
           'angle_2v',
           'dihedral',
           'distance',
           'rmsd',
           'rotate']


[docs]def angle(p1, p2, p3): """Returns an angle from a series of 3 points (point #2 is centroid). Angle is returned in degrees. Parameters ---------- p1,p2,p3 : numpy arrays, shape = [n_points, n_dimensions] Triplets of points in n-dimensional space, aligned in rows. Returns ------- angles : numpy array, shape = [n_points] Series of angles in degrees """ v1 = p1 - p2 v2 = p3 - p2 return angle_2v(v1, v2)
[docs]def angle_2v(v1, v2): """Returns an angle between two vecors.Angle is returned in degrees. Parameters ---------- v1,v2 : numpy arrays, shape = [n_vectors, n_dimensions] Pairs of vectors in n-dimensional space, aligned in rows. Returns ------- angles : numpy array, shape = [n_vectors] Series of angles in degrees """ # better than np.dot(v1, v2), multiple vectors can be applied dot = (v1 * v2).sum(axis=-1) norm = np.linalg.norm(v1, axis=-1) * np.linalg.norm(v2, axis=-1) return np.degrees(np.arccos(np.clip(dot/norm, -1, 1)))
[docs]def dihedral(p1, p2, p3, p4): """Returns an dihedral angle from a series of 4 points. Dihedral is returned in degrees. Function distingishes clockwise and antyclockwise dihedrals. Parameters ---------- p1, p2, p3, p4 : numpy arrays, shape = [n_points, n_dimensions] Quadruplets of points in n-dimensional space, aligned in rows. Returns ------- angles : numpy array, shape = [n_points] Series of angles in degrees """ v12 = (p1 - p2)/np.linalg.norm(p1 - p2) v23 = (p2 - p3)/np.linalg.norm(p2 - p3) v34 = (p3 - p4)/np.linalg.norm(p3 - p4) c1 = np.cross(v12, v23) c2 = np.cross(v23, v34) out = angle_2v(c1, c2) # check clockwise and anticlockwise n1 = c1 / np.linalg.norm(c1) mask = (n1 * v34).sum(axis=-1) > 0 if len(mask.shape) == 0: if mask: out = -out else: out[mask] = -out[mask] return out
[docs]def rmsd(ref, mol, ignore_h=True, method=None, normalize=False): """Computes root mean square deviation (RMSD) between two molecules (including or excluding Hydrogens). No symmetry checks are performed. Parameters ---------- ref : oddt.toolkit.Molecule object Reference molecule for the RMSD calculation mol : oddt.toolkit.Molecule object Query molecule for RMSD calculation ignore_h : bool (default=False) Flag indicating to ignore Hydrogen atoms while performing RMSD calculation. This toggle works only with 'hungarian' method and without sorting (method=None). method : str (default=None) The method to be used for atom asignment between ref and mol. None means that direct matching is applied, which is the default behavior. Available methods: - canonize - match heavy atoms using canonical ordering (it forces ignoring H's) - hungarian - minimize RMSD using Hungarian algorithm - min_symmetry - makes multiple molecule-molecule matches and finds minimal RMSD (the slowest). Hydrogens are ignored. normalize : bool (default=False) Normalize RMSD by square root of rot. bonds Returns ------- rmsd : float RMSD between two molecules """ if method == 'canonize': ref_atoms = ref.coords[ref.canonic_order] mol_atoms = mol.coords[mol.canonic_order] elif method == 'hungarian': mol_map = [] ref_map = [] for a_type in np.unique(mol.atom_dict['atomtype']): if a_type != 'H' or not ignore_h: mol_idx = np.argwhere(mol.atom_dict['atomtype'] == a_type).flatten() ref_idx = np.argwhere(ref.atom_dict['atomtype'] == a_type).flatten() if len(mol_idx) != len(ref_idx): raise ValueError('Unequal number of atoms type: %s' % a_type) if len(mol_idx) == 1: mol_map.append(mol_idx) ref_map.append(ref_idx) continue M = distance(mol.atom_dict['coords'][mol_idx], ref.atom_dict['coords'][ref_idx]) M = M - M.min(axis=0) - M.min(axis=1).reshape(-1, 1) tmp_mol, tmp_ref = linear_sum_assignment(M) mol_map.append(mol_idx[tmp_mol]) ref_map.append(ref_idx[tmp_ref]) mol_atoms = mol.atom_dict['coords'][np.hstack(mol_map)] ref_atoms = ref.atom_dict['coords'][np.hstack(ref_map)] elif method == 'min_symmetry': min_rmsd = None ref_atoms = ref.atom_dict[ref.atom_dict['atomicnum'] != 1]['coords'] mol_atoms = mol.atom_dict[mol.atom_dict['atomicnum'] != 1]['coords'] # safety swith to check if number of heavy atoms match if ref_atoms.shape == mol_atoms.shape: # match mol to ref, generate all matches to find best RMSD matches = oddt.toolkit.Smarts(ref).findall(mol, unique=False) if not matches: raise ValueError('Could not find any match between molecules.') # calculate RMSD between all matches and retain the smallest for match in matches: match = np.array(match, dtype=int) if is_openbabel_molecule(mol): match -= 1 # OB has 1-based indices tmp_dict = mol.atom_dict[match] mol_atoms = tmp_dict[tmp_dict['atomicnum'] != 1]['coords'] # following should not happen, although safety check is left if mol_atoms.shape != ref_atoms.shape: raise Exception('Molecular match got wrong number of atoms.') rmsd = np.sqrt(((mol_atoms - ref_atoms)**2).sum(axis=-1).mean()) if min_rmsd is None or rmsd < min_rmsd: min_rmsd = rmsd return min_rmsd elif ignore_h: mol_atoms = mol.coords[mol.atom_dict['atomicnum'] != 1] ref_atoms = ref.coords[ref.atom_dict['atomicnum'] != 1] else: mol_atoms = mol.coords ref_atoms = ref.coords if mol_atoms.shape == ref_atoms.shape: rmsd = np.sqrt(((mol_atoms - ref_atoms)**2).sum(axis=-1).mean()) if normalize: rmsd /= np.sqrt(mol.num_rotors) return rmsd # at this point raise an exception raise ValueError('Unequal number of atoms in molecules (%i and %i)' % (len(mol_atoms), len(ref_atoms)))
[docs]def distance(x, y): """Computes distance between each pair of points from x and y. Parameters ---------- x : numpy arrays, shape = [n_x, 3] Array of poinds in 3D y : numpy arrays, shape = [n_y, 3] Array of poinds in 3D Returns ------- dist_matrix : numpy arrays, shape = [n_x, n_y] Distance matrix """ return cdist(x, y)
def distance_complex(x, y): """ Computes distance between points, similar to distance(cdist), with major difference - allows higher dimmentions of input (cdist supports 2). distance is purely float64 and can de slightly more precise. Parameters ---------- x : numpy arrays, shape = [..., 3] Array of poinds in 3D y : numpy arrays, shape = [..., 3] Array of poinds in 3D Returns ------- dist_matrix : numpy arrays Distance matrix """ return np.linalg.norm(x[..., np.newaxis, :] - y, axis=-1)
[docs]def rotate(coords, alpha, beta, gamma): """Rotate coords by cerain angle in X, Y, Z. Angles are specified in radians. Parameters ---------- coords : numpy arrays, shape = [n_points, 3] Coordinates in 3-dimensional space. alpha, beta, gamma: float Angles to rotate the coordinates along X, Y and Z axis. Angles are specified in radians. Returns ------- new_coords : numpy arrays, shape = [n_points, 3] Rorated coordinates in 3-dimensional space. """ centroid = coords.mean(axis=0) coords = coords - centroid sin_alpha = sin(alpha) cos_alpha = cos(alpha) sin_beta = sin(beta) cos_beta = cos(beta) sin_gamma = sin(gamma) cos_gamma = cos(gamma) rot_matrix = np.array([[cos_beta * cos_gamma, sin_alpha * sin_beta * cos_gamma - cos_alpha * sin_gamma, cos_alpha * sin_beta * cos_gamma + sin_alpha * sin_gamma], [cos_beta * sin_gamma, sin_alpha * sin_beta * sin_gamma + cos_alpha * cos_gamma, cos_alpha * sin_beta * sin_gamma - sin_alpha * cos_gamma], [-sin_beta, sin_alpha * cos_beta, cos_alpha * cos_beta]]) return (coords[:, np.newaxis, :] * rot_matrix).sum(axis=2) + centroid