Source code for pytadbit.boundary_aligner.aligner

"""
02 Dec 2012


"""
from pytadbit.boundary_aligner.globally     import needleman_wunsch
from pytadbit.boundary_aligner.reciprocally import reciprocal

def consensusize(ali1, ali2, passed):
    """
    Given two alignments returns a consensus alignment. Used for the generation
    of multiple alignments

    :param ali1: first aligned sequence
    :param ali1: second aligned sequence
    :param passed: in case first aligned sequence is already a consensus, it
       might be weighted
    :returns: consensus sequence corresponding to ali1 and ali2
    """
    consensus = []
    for pos in range(len(ali1)):
        if ali1[pos] != ali2[pos]:
            try:
                bound = (ali1[pos] * passed + ali2[pos]) / (1 + passed)
            except TypeError:
                bound = ali1[pos] if not isinstance(ali1[pos],
                                                    str) else ali2[pos]
        else:
            bound = ali1[pos]
        consensus.append(bound)
    return consensus


[docs]def align(sequences, method='reciprocal', **kwargs):
    """
    Align Topologically Associating Domain borders. Supports multiple-alignment
    by building a consensus TAD sequence and aligning each experiment to it.

    .. note::

      as long as we are using multiple alignments in an iterative way,
      the order of sequences will be relevant. Here experiments are sorted
      according to the value of the first boundary found in order to try to
      reduce this problem.

    :param reciprocal method: method used to align
    :returns: the result of the aligner used
    """
    if method == 'global':
        aligner = needleman_wunsch
    elif method == 'reciprocal':
        aligner = reciprocal
    else:
        raise NotImplementedError(('Only "global" and "reciprocal" are ' +
                                   'implemented right now.\n'))
    if len(sequences) > 2:
        dico = {}
        consensus = None
        for j, (i, seq) in enumerate(sorted(enumerate(sequences),
                                            key=lambda x: x[1])):
            consensus = consensus or seq
            dico[j] = {'sort':i,
                       'seq' :seq}
        aligneds = []
        scores = 0
        perc1 = 0
        perc2 = 0
        for other in range(1, len(sequences)):
            try:
                ([align1, align2], score,
                 p1, p2) = aligner(consensus, dico[other]['seq'], **kwargs)
                perc1 += p1
                perc2 += p2
            except ValueError:
                [align1, align2], score = aligner(consensus, dico[other]['seq'],
                                                  **kwargs)
            scores += score
            if len(consensus) != len(align1):
                for pos in range(len(align1)):
                    try:
                        if align1[pos] != '-':
                            continue
                        for ali in aligneds:
                            ali.insert(pos, '-')
                    except IndexError:
                        for ali in aligneds:
                            ali.append('-')
            if not aligneds:
                aligneds.append(align1)
            aligneds.append(align2)
            consensus = consensusize(align1, align2, other)

        sort_alis = [[] for _ in range(len(dico))]
        for seq in range(len(dico)):
            sort_alis[dico[seq]['sort']] = aligneds[seq][:]
        return (sort_alis, scores,
                perc1 / (len(sequences) - 1.),
                perc2 / (len(sequences) - 1.)), consensus
    ([align1, align2], score, p1, p2) = aligner(sequences[0], sequences[1], **kwargs)
    consensus = consensusize(align1, align2, sequences[1])
    return ([align1, align2], score, p1, p2), consensus