Source code for mgkit.snps.conv_func

"""
Wappers to use some of the general function of the snps package
in a simpler way.
"""
import functools
import mgkit.snps.funcs
import mgkit.snps.filter
import mgkit.snps.mapper


[docs]def get_rank_dataframe(snp_data, taxonomy, min_num=3, rank='order', index_type='taxon', filters=None): """ .. versionadded:: 0.1.11 .. versionchanged:: 0.2.2 added *filters* argument Returns a :class:`~pandas.DataFrame` with the pN/pS of the given SNPs data, mapping all taxa to the specified rank. Higher taxa won't be included. Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using filters from :func:`~mgkit.snps.filter.get_default_filters` and as `taxon_func` parameter :func:`~mgkit.snps.mapper.map_taxon_id_to_rank`, with include_higher equals to False Arguments: snp_data (dict): dictionary sample->GeneSyn of SNPs data taxonomy: Uniprot Taxonomy min_num (int): minimum number of samples in which a valid pN/pS is found rank (str): taxon rank to map. Valid ranks are found in :data:`mgkit.taxon.TAXON_RANKS` index_type (str, None): type of index to return filters (iterable): list of filters to apply, otherwise uses the default filters Returns: DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type is 'taxon' """ taxon_func = functools.partial( mgkit.snps.mapper.map_taxon_id_to_rank, taxonomy=taxonomy, rank=rank ) if filters is None: filters = mgkit.snps.filter.get_default_filters(taxonomy) dataframe = mgkit.snps.funcs.combine_sample_snps( snp_data, min_num, filters, taxon_func=None if rank is None else taxon_func, gene_func=None, index_type=index_type ) return dataframe
[docs]def get_gene_map_dataframe(snp_data, taxonomy, gene_map, min_num=3, index_type='gene', filters=None): """ .. versionadded:: 0.1.11 .. versionchanged:: 0.2.2 added *filters* argument Returns a :class:`~pandas.DataFrame` with the pN/pS of the given SNPs data, mapping all taxa to the gene map. Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using filters from :func:`~mgkit.snps.filter.get_default_filters` and as `gene_func` parameter :func:`~mgkit.snps.mapper.map_gene_id`. Arguments: snp_data (dict): dictionary sample->GeneSyn of SNPs data taxonomy: Uniprot Taxonomy min_num (int): minimum number of samples in which a valid pN/pS is found gene_map (dict): dictionary of mapping for the gene_ids in in SNPs data index_type (str, None): type of index to return filters (iterable): list of filters to apply, otherwise uses the default filters Returns: DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type is 'gene' """ gene_func = functools.partial( mgkit.snps.mapper.map_gene_id, gene_map=gene_map ) if filters is None: filters = mgkit.snps.filter.get_default_filters(taxonomy) dataframe = mgkit.snps.funcs.combine_sample_snps( snp_data, min_num, filters, taxon_func=None, gene_func=gene_func, index_type=index_type ) return dataframe
[docs]def get_full_dataframe(snp_data, taxonomy, min_num=3, index_type=None, filters=None): """ .. versionadded:: 0.1.12 .. versionchanged:: 0.2.2 added *filters* argument Returns a :class:`~pandas.DataFrame` with the pN/pS of the given SNPs data. Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using filters from :func:`~mgkit.snps.filter.get_default_filters`. Arguments: snp_data (dict): dictionary sample->GeneSyn of SNPs data taxonomy: Uniprot Taxonomy min_num (int): minimum number of samples in which a valid pN/pS is found index_type (str, None): type of index to return filters (iterable): list of filters to apply, otherwise uses the default filters Returns: DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type is None (gene-taxon) """ if filters is None: filters = mgkit.snps.filter.get_default_filters(taxonomy) dataframe = mgkit.snps.funcs.combine_sample_snps( snp_data, min_num, filters, taxon_func=None, gene_func=None, index_type=index_type ) return dataframe
[docs]def get_gene_taxon_dataframe(snp_data, taxonomy, gene_map, min_num=3, rank='genus', index_type=None, filters=None, use_uid=False): """ .. versionadded:: 0.1.12 .. versionchanged:: 0.2.2 added *filters* argument .. versionchanged:: 0.5.1 gene_map can be *None*, use_uid can be passed to the underline function .. todo:: edit docstring Returns a :class:`~pandas.DataFrame` with the pN/pS of the given SNPs data, mapping all taxa to the gene map. Shortcut for using :func:`~mgkit.snps.funcs.combine_sample_snps`, using filters from :func:`~mgkit.snps.filter.get_default_filters` and as `gene_func` parameter :func:`~mgkit.snps.mapper.map_gene_id`. Arguments: snp_data (dict): dictionary sample->GeneSyn of SNPs data taxonomy: Uniprot Taxonomy min_num (int): minimum number of samples in which a valid pN/pS is found gene_map (dict): dictionary of mapping for the gene_ids in in SNPs data index_type (str, None): type of index to return filters (iterable): list of filters to apply, otherwise uses the default filters use_uid (bool): instead of using *gene_id*, uses *uid* as gene ID Returns: DataFrame: :class:`pandas.DataFrame` of pN/pS values. The index type is 'gene' """ gene_func = functools.partial( mgkit.snps.mapper.map_gene_id, gene_map=gene_map ) if rank is None: taxon_func = None else: taxon_func = functools.partial( mgkit.snps.mapper.map_taxon_id_to_rank, taxonomy=taxonomy, rank=rank ) if filters is None: filters = mgkit.snps.filter.get_default_filters(taxonomy) dataframe = mgkit.snps.funcs.combine_sample_snps( snp_data, min_num, filters, taxon_func=taxon_func, gene_func=None if gene_map is None else gene_func, index_type=index_type, use_uid=use_uid ) return dataframe