Source code for mgkit.snps.filter
"""
SNPs filtering functions
"""
import functools
from builtins import filter
from .. import consts
from ..filter.taxon import filter_taxon_by_id_list
from ..filter.common import FilterFails
[docs]def filter_genesyn_by_taxon_id(gene_syn, taxonomy=None, filter_list=None,
exclude=False, func=None):
"""
Checks if the `taxon_id` attribute of `gene_syn` is the `filter_list`.
Exclude reverses the result. If func is supplied, it's used to traverse
the `taxonomy`.
Arguments:
gene_syn: :class:`~mgkit.snps.GeneSyn` instance
taxonomy: a valid taxonomy (instance of
:class:`~mgkit.taxon.Taxonomy`)
filter_list (iterable): list of taxon IDs to include/exclude
exclude (bool): if the filter is reversed
func (func): :func:`~mgkit.taxon.is_ancestor`
Returns:
bool: if the exclude is True, the gene_id must appear in the gene_ids,
if False, returns True only if gene_id is NOT in gene_ids.
Raises:
FilterFails: if filter_list is None or taxonomy is None and func is not
None
"""
if ((taxonomy is None) and (func is not None)) or (filter_list is None):
raise FilterFails('No taxonomy supllied')
if func is not None:
func = functools.partial(func, taxonomy)
taxon_id = gene_syn.taxon_id
return filter_taxon_by_id_list(
taxon_id,
filter_list=filter_list,
exclude=exclude,
func=func
)
[docs]def filter_genesyn_by_gene_id(gene_syn, gene_ids=None, exclude=False,
id_func=None):
"""
Checks if the gene_id is listed in the filter_list.
Arguments:
gene_syn: :class:`~mgkit.snps.GeneSyn` instance
gene_ids (iterable): list of gene IDs to include/exclude
exclude (bool): if the filter is reversed
Returns:
bool: if the exclude is True, the gene_id must appear in the gene_ids,
if False, returns True only if gene_id is NOT in gene_ids.
Raises:
FilterFails: if gene_ids is None
"""
if gene_ids is None:
raise FilterFails('No gene_ids supplied')
return (id_func(gene_syn) in gene_ids) ^ exclude
[docs]def filter_genesyn_by_coverage(gene_syn, min_cov=None):
"""
Checks if the coverage of the provided `gene_syn` is at least `min_cov`
Arguments:
gene_syn: :class:`~mgkit.snps.GeneSyn` instance
min_cov (int): minimum coverage allowed (included)
Returns:
bool: True if the gene has enough coverage
Raises:
FilterFails: if min_cov is None
"""
if min_cov is None:
raise FilterFails('No coverage supplied')
return gene_syn.coverage >= min_cov
[docs]def get_default_filters(taxonomy, **kwargs):
"""
Retuns a list of filters that are used by default. it needs a valid
taxonomy and gets the default arguments from
:data:`mgkit.consts.DEFAULT_SNP_FILTER`.
"""
filter_opts = consts.DEFAULT_SNP_FILTER.copy()
filter_opts.update(kwargs)
filter_coverage = functools.partial(
filter_genesyn_by_coverage,
min_cov=filter_opts['min_cov']
)
filter_black_list = functools.partial(
filter_genesyn_by_taxon_id,
taxonomy=taxonomy,
filter_list=filter_opts['include_only'],
exclude=False,
func=filter_opts['func']
)
return [filter_coverage, filter_black_list]
[docs]def pipe_filters(iterable, *funcs):
"""
Pipes a list of filter to iterable, using the python ifilter function in
the itertools module. Now using `builtins.filter`
"""
for func in funcs:
iterable = filter(func, iterable)
for value in iterable:
yield value