Source code for pytadbit.boundary_aligner.reciprocally

"""
02 Dec 2012

Aligner based on reciprocal closest hits for Topologically Associated Domains
"""
from __future__ import print_function

def find_closest(num, tads1, start=0):
    closest = 0
    diff = inf = float('inf')
    for n in tads1:
        if n < start: continue
        if abs(n - num) < diff:
            diff = abs(n - num)
            closest = n
        elif diff != inf:
            break
    return closest


def find_closest_reciprocal(t1, tads1, tads2, start=0):
    """
    Function to check the needleman_wunsch algorithm.
    """
    closest = None
    diff = inf = float('inf')
    gap = 0
    for t2 in tads2:
        if t2 <= start: continue
        if abs(t2 - t1) < diff:
            t1prim = find_closest(t2, tads1, start=t1)
            if t1 == t1prim:
                if diff != inf:
                    gap += 1
                diff = abs(t2 - t1)
                closest = t2
        elif diff != inf:
            break
    if diff == inf:
        return '-', 0
    return closest, gap


[docs]def reciprocal(tads1, tads2, penalty=None, verbose=False, max_dist=100000): """ Method based on reciprocal closest boundaries (bd). bd1 will be aligned with bd2 (closest boundary from bd1) if and only if bd1 is the closest boundary of bd2 too (and of course if the distance between bd1 and bd2 is lower than max_dist). :param tads1: list of boundaries :param tads2: list of boundaries :param None penalty: if None, penalty will be two times max_dist :param verbose: print alignment :param 100000 max_dist: distance threshold from which two boundaries can not be aligned together :returns: the alignment and a score between 0 and 1 (0: bad, 1: good). """ if not penalty: # set penalty to the average length of a TAD penalty = 2 * max_dist start = 0 diffs = [] align1 = [] align2 = [] adj = 0 for t in tads1: closest, gap = find_closest_reciprocal(t, tads1, tads2, start=start) diff = penalty while gap > 0: try: align2.append(tads2[adj]) diffs.append(diff) except IndexError: break align1.append('-') adj += 1 gap -= 1 if closest != '-': start = closest diff = abs(t - closest) if diff > max_dist: # print 'MAAAAX: ', t, closest, diff, max_dist if t > closest: align2.append(closest) adj += 1 align1.append('-') closest = '-' else: align1.append(t) align2.append('-') t = '-' diff = penalty diffs.append(diff) #print 't2i {}; start {}; t1 {}; clos {}; gap {}'.format ( # tads2[i], start, t, closest, gap) align1.append(t) align2.append(closest) if closest != '-': adj += 1 if verbose: print('\n Alignment:') print('TADS 1: '+'|'.join(['%6s' % (str(int(x/1000)) if x!='-' else '-'*3) \ for x in align1])) print('TADS 2: '+'|'.join(['%6s' % (str(int(x/1000)) if x!='-' else '-'*3) \ for x in align2])) diff = len(align1) - len(diffs) perc1 = 1 - float(diff) / len(tads1) perc2 = 1 - float(diff) / len(tads2) score = 1 - (float(penalty * diff + sum(diffs))) / len(align1) / penalty return [align1, align2], score, perc1, perc2