Source code for mgkit.db.mongo
"""
.. versionadded:: 0.2.1
This module contains functions and classes to use for a DB like representation
of annotations using the *pymongo* package, a driver to **MongoDB**.
In a MongoDB document, exported from an annotation, using the
:meth:`mgkit.io.gff.Annotation.to_mongodb` method, the keys that are defined
are::
seq_id, source, feat_type, start, end, score, strand,
phase, gene_id, taxon_id, bitscore, exp_nonsyn, exp_syn,
length, dbq, coverage, map
These are defined because they have values that are not strings (defined as
properties in :class:`mgkit.io.gff.Annotation`. The rest of the attributes
defined are kept as well, but no ckeck for the data type is made.
.. note::
lineage is added as a key, whose values are taxon_id, if a function has
been passed to :meth:`mgkit.io.gff.Annotation.to_mongodb`
The exception is the **map** key in the document. It store both the EC mappings
(EC attribute in the GFF), as well as all mappings whose attribute starts with
*map_*. The former is usually accessed from
:meth:`mgkit.io.gff.Annotation.get_ec` while the latter from
:meth:`mgkit.io.gff.Annotation.get_mapping` or
:meth:`mgkit.io.gff.Annotation.get_mappings`.
These 3 methods return a list and this list is used in the MongoDB document.
The MongoDB document will contain a **map** key where the values are the type
of mappings, and the values the list of IDs the annoation maps to.
.. list-table:: Example for the map dictionary
:header-rows: 1
* - Type
- GFF
- Annotation
- MongoDB Document
- MongoDB Query
* - EC
- EC
- get_ec
- ec
- map.ec
* - KO
- map_KO
- get_mapping('ko')
- ko
- map.ko
* - eggNOG
- map_EGGNOG
- get_mapping('eggnog')
- eggnog
- map.eggnog
"""
from builtins import object
import logging
from ..io import gff
from pymongo import MongoClient
LOG = logging.getLogger(__name__)
[docs]class GFFDB(object):
"""
.. versionchanged:: 0.3.4
added *timeout* parameter
Wrapper to a MongoDB connection/db. It is used to automate the convertion
of MongoDB records into :class:`mgkit.io.gff.Annotation` instances.
"""
conn = None
db = None
def __init__(self, db, collection, uri=None, timeout=5):
self.conn = MongoClient(uri, serverSelectionTimeoutMS=timeout)
self.db = self.conn[db][collection]
[docs] def cursor(self, query=None):
"Returns a cursor for the query"
return self.db.find(query)
[docs] def convert_record(self, record):
"""
.. versionchanged:: 0.3.1
removes *lineage* from the attributes
Converts the record (a dictionary instance) to an Annotation
"""
return gff.from_mongodb(record, lineage=False)
[docs] def find_annotation(self, query=None):
"""
Iterate over a cursor created using *query* and yields each record
after converting it to a :class:`mgkit.io.gff.Annotation` instance,
using :meth:`mgkit.db.mongo.GFFDB.convert_record`.
"""
for record in self.cursor(query):
yield self.convert_record(record)
[docs] def __getitem__(self, uid):
"""
.. versionadded:: 0.3.1
Retrieves an annotation from the DB by its *uid*
"""
return self.convert_record(self.db.find_one(uid))
[docs] def insert_one(self, annotation):
"""
.. versionadded:: 0.3.4
Inserts an annotation into the DB
Raises:
TypeError: if the passed object is not an annotation
"""
if not isinstance(annotation, gff.Annotation):
raise TypeError(
'An mgkit.io.gff.Annotation instance is required: {}'.format(
type(annotation)
)
)
self.db.insert_one(annotation.to_mongodb(raw=True))
[docs] def insert_many(self, annotations):
"""
.. versionadded:: 0.3.4
Inserts annotations into the DB
.. warning::
The object must be a :class:`mgkit.io.gff.Annotation`
"""
self.db.insert_many(
(
annotation.to_mongodb(raw=True)
for annotation in annotations
),
ordered=False
)
[docs] def __iter__(self):
"""
.. versionadded:: 0.3.1
Iterates over all annotations
"""
return self.values()
[docs] def values(self):
"""
.. versionadded:: 0.3.1
Iterates over all the annotations in the db/collection
"""
return self.find_annotation()
[docs] def itervalues(self):
"""
.. versionadded:: 0.3.1
Alias for :meth:`GFFDB.values`
"""
return self.values()
[docs] def items(self):
"""
.. versionadded:: 0.3.1
Iterates over all the annotations in the db/collection, yielding a
tuple (*annotation.uid*, *annotation*)
"""
for record in self.values():
yield record.uid, record
[docs] def iteritems(self):
"""
.. versionadded:: 0.3.1
Alias for :meth:`GFFDB.items`
"""
return self.items()
[docs] def keys(self):
"""
.. versionadded:: 0.3.1
Iterates over all the *uid* in the db/collection
"""
for record in self.values():
yield record.uid