""" Datasets wrapped in conviniet models """
import csv
import six
from os.path import isfile
from oddt import toolkit
# skip comments and merge multiple spaces
def _csv_file_filter(f):
for row in open(f, 'rb'):
row = row.decode('utf-8', 'ignore')
if row[0] == '#':
continue
yield ' '.join(row.split())
[docs]class pdbbind(object):
def __init__(self,
home,
version=None,
default_set=None,
data_file=None,
opt=None):
version = int(version)
self.home = home
self.default_set = default_set if default_set else 'general' if version == 2007 else 'general_PL'
self.opt = opt or {}
self.sets = {}
self._set_ids = {}
self._set_act = {}
if version:
if version == 2007:
pdbind_sets = ['core', 'refined', 'general']
else:
pdbind_sets = ['core', 'refined', 'general_PL']
for pdbind_set in pdbind_sets:
if data_file:
csv_file = data_file
elif version == 2007:
csv_file = '%s/INDEX.%i.%s.data' % (self.home, version, pdbind_set)
elif version == 2016:
csv_file = '%s/index/INDEX_%s_data.%i' % (self.home, pdbind_set, version)
else:
csv_file = '%s/INDEX_%s_data.%i' % (self.home, pdbind_set, version)
if isfile(csv_file):
self._set_ids[pdbind_set] = []
self._set_act[pdbind_set] = []
for row in csv.reader(_csv_file_filter(csv_file), delimiter=' '):
pdbid = row[0]
if not isfile('%s/%s/%s_pocket.pdb' % (self.home, pdbid, pdbid)):
continue
self._set_ids[pdbind_set].append(pdbid)
self._set_act[pdbind_set].append(float(row[3]))
self.sets[pdbind_set] = dict(zip(self._set_ids[pdbind_set], self._set_act[pdbind_set]))
if len(self.sets) == 0:
raise Exception('There is no PDBbind set availabe')
else:
pass # list directory, but no metadata then
@property
def ids(self):
# return sorted(self.sets[self.default_set].keys())
return self._set_ids[self.default_set]
@property
def activities(self):
return self._set_act[self.default_set]
def __iter__(self):
for pdbid in self.ids:
yield _pdbbind_id(self.home, pdbid, opt=self.opt)
def __getitem__(self, pdbid):
if pdbid in self.ids:
return _pdbbind_id(self.home, pdbid, opt=self.opt)
else:
if type(pdbid) is int:
return _pdbbind_id(self.home + '', self.ids[pdbid], opt=self.opt)
return None
class _pdbbind_id(object):
def __init__(self, home, pdbid, opt=None):
self.home = home
self.id = pdbid
self.opt = opt or {}
@property
def protein(self):
if isfile('%s/%s/%s_protein.pdb' % (self.home, self.id, self.id)):
return six.next(toolkit.readfile('pdb',
'%s/%s/%s_protein.pdb' % (self.home, self.id, self.id),
lazy=True,
opt=self.opt))
else:
return None
@property
def pocket(self):
if isfile('%s/%s/%s_pocket.pdb' % (self.home, self.id, self.id)):
return six.next(toolkit.readfile('pdb',
'%s/%s/%s_pocket.pdb' % (self.home, self.id, self.id),
lazy=True,
opt=self.opt))
else:
return None
@property
def ligand(self):
if isfile('%s/%s/%s_ligand.sdf' % (self.home, self.id, self.id)):
return six.next(toolkit.readfile('sdf',
'%s/%s/%s_ligand.sdf' % (self.home, self.id, self.id),
lazy=True,
opt=self.opt))
else:
return None