Source code for mgkit.mappings.pandas_map

"""
Module that contains mapping operations on pandas data structures
"""
from __future__ import division
from future.utils import viewitems
from builtins import zip
import pandas


[docs]def group_dataframe_by_mapping(dataframe, mapping, root_taxon, name_dict=None): """ Return a :class:`pandas.DataFrame` filtered by mapping and root taxon, the values for each column is averaged over all genes mapping to a category. :param DataFrame dataframe: DataFrame with multindex gene-root :param dict mapping: dictionary of category->genes :param str root_taxon: root taxon to group genes :param dict name_dict: dictionary of category->name :return DataFrame: DataFrame filtered """ data_dict = {} for category, id_list in viewitems(mapping): category_name = category if name_dict is not None: category_name = name_dict[category_name] data_dict[category_name] = dataframe.loc[id_list].xs( root_taxon, level='root' ).mean() return pandas.DataFrame.from_dict(data_dict, orient='index')
[docs]def calc_coefficient_of_variation(dataframe): """ Calculate coefficient of variation for a DataFrame. Uses formula from `Wikipedia <http://en.wikipedia.org/wiki/Coefficient_of_variation>`_ The formula used is :math:`\\left (1 + \\frac {1}{4n} \\right ) * c_{v}` where :math:`c_{v} = \\frac {s}{\\bar{x}}` """ dataframe_cv = dataframe.std(axis=1) / dataframe.mean(axis=1) coeff = 1 + (1.0 / (4 * dataframe.count(axis=1))) return coeff * dataframe_cv
[docs]def make_stat_table(dataframes, roots): """ Produces a :class:`pandas.DataFrame` that summarise the supplied DataFrames. The stats include mean, stdev and coefficient of variation for each root taxon. :param iterable dataframes: iterable of DataFrame instances :param iterable roots: list of root taxa to which each table belongs :return DataFrame: returns a DataFrame instance """ index = [] data = {} for dataframe, root in zip(dataframes, roots): index.append((root, 'mean')) data[(root, 'mean')] = dataframe.mean(axis=1) index.append((root, 'stdev')) data[(root, 'stdev')] = dataframe.std(axis=1) index.append((root, 'c. var')) data[(root, 'c. var')] = calc_coefficient_of_variation(dataframe) index = pandas.MultiIndex.from_tuples(sorted(index), names=('root', 'value')) return pandas.DataFrame(data, columns=index)
[docs]def concatenate_and_rename_tables(dataframes, roots): """ Concatenates a list of :class:`pandas.DataFrame` instances and renames the columns prepending a string to each column in each table from a list of prefixes. :param iterable dataframes: iterable of DataFrame instances :param iterable roots: list of prefixes to append to the column names of each DataFrame :return DataFrame: returns a DataFrame instance .. todo:: * move to pandas_utils? """ renamed = [] for dataframe, root in zip(dataframes, roots): index = dict((column, root + column) for column in dataframe.columns) renamed.append(dataframe.rename(columns=index)) return pandas.concat(renamed, axis=1)