Source code for pytadbit.boundary_aligner.reciprocally
"""
02 Dec 2012
Aligner based on reciprocal closest hits for Topologically Associated Domains
"""
from __future__ import print_function
def find_closest(num, tads1, start=0):
closest = 0
diff = inf = float('inf')
for n in tads1:
if n < start: continue
if abs(n - num) < diff:
diff = abs(n - num)
closest = n
elif diff != inf:
break
return closest
def find_closest_reciprocal(t1, tads1, tads2, start=0):
"""
Function to check the needleman_wunsch algorithm.
"""
closest = None
diff = inf = float('inf')
gap = 0
for t2 in tads2:
if t2 <= start: continue
if abs(t2 - t1) < diff:
t1prim = find_closest(t2, tads1, start=t1)
if t1 == t1prim:
if diff != inf:
gap += 1
diff = abs(t2 - t1)
closest = t2
elif diff != inf:
break
if diff == inf:
return '-', 0
return closest, gap
[docs]def reciprocal(tads1, tads2, penalty=None, verbose=False, max_dist=100000):
"""
Method based on reciprocal closest boundaries (bd). bd1 will be aligned
with bd2 (closest boundary from bd1) if and only if bd1 is the closest
boundary of bd2 too (and of course if the distance between bd1 and bd2 is
lower than max_dist).
:param tads1: list of boundaries
:param tads2: list of boundaries
:param None penalty: if None, penalty will be two times max_dist
:param verbose: print alignment
:param 100000 max_dist: distance threshold from which two boundaries can
not be aligned together
:returns: the alignment and a score between 0 and 1 (0: bad, 1: good).
"""
if not penalty:
# set penalty to the average length of a TAD
penalty = 2 * max_dist
start = 0
diffs = []
align1 = []
align2 = []
adj = 0
for t in tads1:
closest, gap = find_closest_reciprocal(t, tads1, tads2,
start=start)
diff = penalty
while gap > 0:
try:
align2.append(tads2[adj])
diffs.append(diff)
except IndexError:
break
align1.append('-')
adj += 1
gap -= 1
if closest != '-':
start = closest
diff = abs(t - closest)
if diff > max_dist:
# print 'MAAAAX: ', t, closest, diff, max_dist
if t > closest:
align2.append(closest)
adj += 1
align1.append('-')
closest = '-'
else:
align1.append(t)
align2.append('-')
t = '-'
diff = penalty
diffs.append(diff)
#print 't2i {}; start {}; t1 {}; clos {}; gap {}'.format (
# tads2[i], start, t, closest, gap)
align1.append(t)
align2.append(closest)
if closest != '-':
adj += 1
if verbose:
print('\n Alignment:')
print('TADS 1: '+'|'.join(['%6s' % (str(int(x/1000)) if x!='-' else '-'*3) \
for x in align1]))
print('TADS 2: '+'|'.join(['%6s' % (str(int(x/1000)) if x!='-' else '-'*3) \
for x in align2]))
diff = len(align1) - len(diffs)
perc1 = 1 - float(diff) / len(tads1)
perc2 = 1 - float(diff) / len(tads2)
score = 1 - (float(penalty * diff + sum(diffs))) / len(align1) / penalty
return [align1, align2], score, perc1, perc2