Source code for mgkit.db.dbm
"""
.. versionadded:: 0.2.1
This module contains functions and classes to use for a dbm like representation
of annotations using the *semidbm* package
"""
import logging
from builtins import object, bytes
from ..io import gff
import semidbm
LOG = logging.getLogger(__name__)
[docs]def create_gff_dbm(annotations, file_name):
"""
.. versionadded:: 0.2.1
Creates a semidbm database, using an annotation `uid` as key and the gff
line as value. The object is synced before being returned.
.. note::
A GFF line is used instead of a json representation because it was
more compact when semidbm was tested.
Arguments:
annotations (iterable): iterable of annotations
file_name (str): database file name, opened with the `c` flag.
Returns:
object: a semidbm database object
"""
database = semidbm.open(file_name, 'c')
LOG.info('DB "%s" opened/created', file_name)
for annotation in annotations:
database[annotation.uid.encode('ascii')] = \
annotation.to_gff().encode('ascii')
database.sync()
return database
[docs]class GFFDB(object):
"""
.. versionadded:: 0.2.1
A wrapper for a semidbm instance, used to convert the GFF line stored in
the DB into an :class:`mgkit.io.gff.Annotation` instance. If a string is
passed to the init method, a DB will be opened with the `c` flag.
The object behaves like a dictionary, wrapping the access to annoations
using a *uid* as key and converting the line into an
:class:`mgkit.io.gff.Annotation` instance.
"""
db = None
def __init__(self, db=None):
if isinstance(db, str):
self.db = semidbm.open(db, 'c')
else:
self.db = db
def __setitem__(self, uid, annotation):
self.db[uid.encode('ascii')] = annotation.to_gff().encode('ascii')
def __getitem__(self, key):
if not isinstance(key, bytes):
key = key.encode('ascii')
return gff.from_gff(self.db[key].decode('ascii'))
def __del__(self):
self.db.close()
def __iter__(self):
for uid in self.db:
yield uid.decode('ascii')
[docs] def items(self):
for uid in self:
yield uid, self[uid]
[docs] def iteritems(self):
return self.items()
[docs] def values(self):
for uid in self:
yield self[uid]
[docs] def itervalues(self):
return self.values()