"""
Wappers to use some of the general function of the snps package
in a simpler way.
"""
import functools
import mgkit.snps.funcs
import mgkit.snps.filter
import mgkit.snps.mapper
[docs]def get_rank_dataframe(snp_data, taxonomy, min_num=3, rank='order',
index_type='taxon', filters=None):
"""
.. versionadded:: 0.1.11
.. versionchanged:: 0.2.2
added *filters* argument
Returns a :class:`~pandas.DataFrame` with the pN/pS of the given
SNPs data, mapping all taxa to the specified rank. Higher taxa won't
be included.
Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using
filters from :func:`~mgkit.snps.filter.get_default_filters` and as
`taxon_func` parameter :func:`~mgkit.snps.mapper.map_taxon_id_to_rank`,
with include_higher equals to False
Arguments:
snp_data (dict): dictionary sample->GeneSyn of SNPs data
taxonomy: Uniprot Taxonomy
min_num (int): minimum number of samples in which a valid pN/pS
is found
rank (str): taxon rank to map. Valid ranks are found in
:data:`mgkit.taxon.TAXON_RANKS`
index_type (str, None): type of index to return
filters (iterable): list of filters to apply, otherwise uses the
default filters
Returns:
DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type
is 'taxon'
"""
taxon_func = functools.partial(
mgkit.snps.mapper.map_taxon_id_to_rank,
taxonomy=taxonomy,
rank=rank
)
if filters is None:
filters = mgkit.snps.filter.get_default_filters(taxonomy)
dataframe = mgkit.snps.funcs.combine_sample_snps(
snp_data,
min_num,
filters,
taxon_func=None if rank is None else taxon_func,
gene_func=None,
index_type=index_type
)
return dataframe
[docs]def get_gene_map_dataframe(snp_data, taxonomy, gene_map, min_num=3,
index_type='gene', filters=None):
"""
.. versionadded:: 0.1.11
.. versionchanged:: 0.2.2
added *filters* argument
Returns a :class:`~pandas.DataFrame` with the pN/pS of the given
SNPs data, mapping all taxa to the gene map.
Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using
filters from :func:`~mgkit.snps.filter.get_default_filters` and as
`gene_func` parameter :func:`~mgkit.snps.mapper.map_gene_id`.
Arguments:
snp_data (dict): dictionary sample->GeneSyn of SNPs data
taxonomy: Uniprot Taxonomy
min_num (int): minimum number of samples in which a valid pN/pS
is found
gene_map (dict): dictionary of mapping for the gene_ids in in SNPs
data
index_type (str, None): type of index to return
filters (iterable): list of filters to apply, otherwise uses the
default filters
Returns:
DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type
is 'gene'
"""
gene_func = functools.partial(
mgkit.snps.mapper.map_gene_id,
gene_map=gene_map
)
if filters is None:
filters = mgkit.snps.filter.get_default_filters(taxonomy)
dataframe = mgkit.snps.funcs.combine_sample_snps(
snp_data,
min_num,
filters,
taxon_func=None,
gene_func=gene_func,
index_type=index_type
)
return dataframe
[docs]def get_full_dataframe(snp_data, taxonomy, min_num=3, index_type=None,
filters=None):
"""
.. versionadded:: 0.1.12
.. versionchanged:: 0.2.2
added *filters* argument
Returns a :class:`~pandas.DataFrame` with the pN/pS of the given
SNPs data.
Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using
filters from :func:`~mgkit.snps.filter.get_default_filters`.
Arguments:
snp_data (dict): dictionary sample->GeneSyn of SNPs data
taxonomy: Uniprot Taxonomy
min_num (int): minimum number of samples in which a valid pN/pS
is found
index_type (str, None): type of index to return
filters (iterable): list of filters to apply, otherwise uses the
default filters
Returns:
DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type
is None (gene-taxon)
"""
if filters is None:
filters = mgkit.snps.filter.get_default_filters(taxonomy)
dataframe = mgkit.snps.funcs.combine_sample_snps(
snp_data,
min_num,
filters,
taxon_func=None,
gene_func=None,
index_type=index_type
)
return dataframe
[docs]def get_gene_taxon_dataframe(snp_data, taxonomy, gene_map, min_num=3,
rank='genus', index_type=None, filters=None,
use_uid=False):
"""
.. versionadded:: 0.1.12
.. versionchanged:: 0.2.2
added *filters* argument
.. versionchanged:: 0.5.1
gene_map can be *None*, use_uid can be passed to the underline function
.. todo::
edit docstring
Returns a :class:`~pandas.DataFrame` with the pN/pS of the given
SNPs data, mapping all taxa to the gene map.
Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using
filters from :func:`~mgkit.snps.filter.get_default_filters` and as
`gene_func` parameter :func:`~mgkit.snps.mapper.map_gene_id`.
Arguments:
snp_data (dict): dictionary sample->GeneSyn of SNPs data
taxonomy: Uniprot Taxonomy
min_num (int): minimum number of samples in which a valid pN/pS
is found
gene_map (dict): dictionary of mapping for the gene_ids in in SNPs
data
index_type (str, None): type of index to return
filters (iterable): list of filters to apply, otherwise uses the
default filters
use_uid (bool): instead of using *gene_id*, uses *uid* as gene ID
Returns:
DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type
is 'gene'
"""
gene_func = functools.partial(
mgkit.snps.mapper.map_gene_id,
gene_map=gene_map
)
if rank is None:
taxon_func = None
else:
taxon_func = functools.partial(
mgkit.snps.mapper.map_taxon_id_to_rank,
taxonomy=taxonomy,
rank=rank
)
if filters is None:
filters = mgkit.snps.filter.get_default_filters(taxonomy)
dataframe = mgkit.snps.funcs.combine_sample_snps(
snp_data,
min_num,
filters,
taxon_func=taxon_func,
gene_func=None if gene_map is None else gene_func,
index_type=index_type,
use_uid=use_uid
)
return dataframe