Source code for mgkit.mappings.utils
"""
Utilities to map genes
"""
from builtins import zip
import pandas
import itertools
[docs]def count_genes_in_mapping(gene_lists, labels, mapping, normalise=False):
"""
Maps lists of ids to a mapping dictionary, returning a
:class:`pandas.DataFrame` in which the rows are the labels provided and
the columns the categories to which the ids map. Each element of the matrix
label-category is the sum of all ids in the relative gene list that maps to
the specific category.
:param iterable gene_lists: an iterable in which each element is a iterable
of ids that can be mapped to mapping
:param iterable labels: an iterable of strings that defines the labels to
be used in the resulting rows in the :class:`pandas.DataFrame`; must
have the same length as gene_lists
:param dict mapping: a dictionary in the form:
gene_id->[cat1, cat2, .., catN]
:param bool normalise: if True the counts are normalised over the total for
each row.
:return: a :class:`pandas.DataFrame` instance
"""
categories = set(itertools.chain(*list(mapping.values())))
matrix = pandas.DataFrame(index=labels, columns=categories)
matrix.fillna(0, inplace=True)
for label, gene_list in zip(labels, gene_lists):
for gene_id in gene_list:
try:
mapped_ids = mapping[gene_id]
except KeyError:
continue
for mapped_id in mapped_ids:
matrix.ix[label, mapped_id] += 1
matrix = matrix.ix[:, matrix.sum() > 0]
if normalise:
matrix = matrix.div(matrix.sum(axis=1), axis=0)
return matrix
[docs]def group_annotation_by_mapping(annotations, mapping, attr='ko'):
"""
Group annotations by mapping dictionary
:param iterable annotations: iterable of :class:`gff.GFFKeg` instances
:param dict mapping: dictionary with mappings for the attribute requested
:param str attr: attribute of the annotation to be used as key in mapping
:return dict: dictionary category->annotations
"""
grouped = dict(
(categ, []) for categ in set(itertools.chain(*list(mapping.values())))
)
for annotation in annotations:
try:
categories = mapping[getattr(annotation.attributes, attr)]
except KeyError:
# not included in the categories
continue
for categ in categories:
grouped[categ].append(annotation)
return grouped