"""
Utility functions
"""
from __future__ import division
from builtins import range
import functools
import warnings
[docs]def average_length(a1s, a1e, a2s, a2e):
"""
Given two sets of coordinates, a1 and a2, returns the average length.
:param int a1s: a1 leftmost number
:param int a1e: a1 rightmost number
:param int a2s: a2 leftmost number
:param int a2e: a2 rightmost number
:return float: the average length
"""
return ((a1e - a1s) + (a2e - a2s) + 2) / 2.0
[docs]def between(pos, start, end):
"""
Tests if a number is between two others
:param int pos: number to test
:param int start: leftmost number
:param int end: rightmost number
:return bool: if the number is between start and end
"""
if pos < start or pos > end:
return False
return True
[docs]def union_range(start1, end1, start2, end2):
"""
.. versionadded:: 0.1.12
.. versionchanged:: 0.3.1
changed behaviour, since the intervals are meant to be closed
If two numeric ranges overlap, it returns the new range, otherwise None is
returned. Works on both int and float numbers, even mixed.
Arguments:
start1 (numeric): start of range 1
end1 (numeric): end of range 1
start2 (numeric): start of range 2
end2 (numeric): end of range 2
Returns:
(tuple or None): union of the ranges or None if the ranges don't
overlap
Example:
>>> union_range(10, 13, 1, 10)
(1, 13)
>>> union_range(1, 10, 11, 13)
(1, 13)
>>> union_range(1, 10, 12, 13)
None
"""
if between(start2, start1, end1) or \
between(end2, start1, end1) or \
(end1 + 1 == start2) or (end2 + 1 == start1):
return min(start1, start2), max(end1, end2)
return None
[docs]def union_ranges(intervals):
"""
.. versionadded:: 0.3.1
From a list of ranges, assumed to be closed, performs a union of all
elements.
Arguments:
intervals (intervals): iterable where each element is a closed range
(tuple)
Returns:
list: the list of ranges that are the union of all elements passed
Examples:
>>> union_ranges([(1, 2), (3, 7), (6, 12), (9, 17), (18, 20)])
[(1, 20)]
>>> union_ranges([(1, 2), (3, 7), (6, 12), (9, 14), (18, 20)])
[(1, 14), (18, 20)]
"""
intervals = sorted(intervals)
union = [intervals.pop(0)]
for start2, end2 in intervals:
start1, end1 = union[-1]
new_range = union_range(start1, end1, start2, end2)
if new_range is None:
union.append(
(start2, end2)
)
else:
union[-1] = new_range
return union
[docs]def complement_ranges(intervals, end=None):
"""
.. versionadded:: 0.3.1
Perform a complement operation of the list of intervals, i.e. returning the
ranges (tuples) that are not included in the list of intervals.
:func:`union_ranges` is first called on the intervals.
.. note::
the `end` parameter is there for cases where the ranges passed don't
cover the whole space. Assuming a list of ranges from annotations on a
nucleotidic sequence, if the last range doesn't include the last
position of the sequence, passing end equal to the length of the
sequence will make the function include a last range that includes it
Arguments:
intervals (intervals): iterable where each element is a closed range
(tuple)
end (int): if the end of the complement intervals is supposed to be
outside the last range.
Returns:
list: the list of intervals that complement the ones passed.
Examples:
>>> complement_ranges([(1, 10), (11, 20), (25, 30)], end=100)
[(21, 24), (31, 100)]
>>> complement_ranges([(1, 10), (11, 20), (25, 30)])
[(21, 24)]
>>> complement_ranges([(0, 2), (3, 17), (18, 20)])
[]
>>> complement_ranges([(0, 2), (3, 17), (18, 20)], end=100)
[(21, 100)]
"""
intervals = union_ranges(intervals)
comp_intervals = []
if intervals[0][0] > 1:
comp_intervals.append((1, intervals[0][0] - 1))
for index in range(0, len(intervals) - 1):
new_start = intervals[index][1] + 1
new_end = intervals[index + 1][0] - 1
if new_start < new_end:
comp_intervals.append(
(new_start, new_end)
)
if end is not None:
start = intervals[-1][1] + 1
if end > start:
comp_intervals.append((start, end))
return comp_intervals
[docs]def ranges_length(ranges):
"""
.. versionadded:: 0.1.12
Given an iterable where each element is a range, a tuple whose elements
are numbers with the first being less than or equal to the second, the
function sums the lengths of all ranges.
.. warning::
it's supposed to be used on intervals that were first passed to
functions like :func:`union_ranges`. If values overlap, there the sum
will be wrong
Arguments:
ranges (iterable): each element is a tuple like *(1, 10)*
Returns:
int: sum of all ranges lengths
"""
return sum(range[1] - range[0] + 1 for range in ranges)
[docs]def range_substract(start1, end1, start2, end2):
intersect = range_intersect(start1, end1, start2, end2)
if intersect is None:
return [(start1, end1)]
ranges = []
if start1 != intersect[0]:
ranges.append((start1, intersect[0] - 1))
if end1 != intersect[1]:
ranges.append((intersect[1] + 1, end1))
return ranges
[docs]def range_intersect(start1, end1, start2, end2):
"""
.. versionadded:: 0.1.13
Given two ranges in the form *(start, end)*, it returns the range
that is the intersection of the two.
Arguments:
start1 (int): start position for the first range
end1 (int): end position for the first range
start2 (int): start position for the second range
end2 (int): end position for the second range
Returns:
(None, tuple): returns a tuple with the start and end position for
the intersection of the two ranges, or *None* if the intersection is
empty
"""
if between(start2, start1, end1) or between(end2, start1, end1) or \
between(start1, start2, end2) or between(end1, start2, end2):
return max(start1, start2), min(end1, end2)
return None
[docs]def apply_func_window(func, data, window, step=0):
if step == 0:
step = window
for index in range(0, len(data), step):
yield func(data[index:index+window])
[docs]def deprecated(func):
'''
This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used.
from https://wiki.python.org/moin/PythonDecoratorLibrary
'''
@functools.wraps(func)
def new_func(*args, **kwargs):
warnings.warn_explicit(
"Call to deprecated function {}.\n{}".format(
func.__name__,
func.__doc__
),
category=DeprecationWarning,
filename=func.__code__.co_filename,
lineno=func.__code__.co_firstlineno + 1
)
return func(*args, **kwargs)
return new_func