Source code for pytadbit.boundary_aligner.globally

"""
02 Dec 2012

global aligner for Topologically Associated Domains
"""
from __future__ import print_function
from math import log


[docs]def needleman_wunsch(tads1, tads2, penalty=-6., ext_pen=-5.6, max_dist=500000, verbose=False): """ Align two lists of TAD boundaries using a Needleman-Wunsh implementation :param tads1: list of boundaries for one chromosome under one condition :param tads2: list of boundaries for the same chromosome under other conditions :param -0.1 penalty: penalty to open a gap in the alignment of boundaries :param 500000 max_dist: distance from which match are denied. A bin_size of 20Kb the number of bins corresponding to 0.5Mb is 25 :param False verbose: print the Needleman-Wunsch score matrix, and the alignment of boundaries :returns: the max score in the Needleman-Wunsch score matrix. """ ############## tads1 = [0.0] + tads1 tads2 = [0.0] + tads2 l_tads1 = len(tads1) l_tads2 = len(tads2) max_dist = log(1. / (abs(max_dist) + 1)) dister = lambda x, y: log(1. / (abs(x - y) + 1)) scores = _virgin_score(penalty, l_tads1, l_tads2) pen = penalty for i in range(1, l_tads1): for j in range(1, l_tads2): d_dist = dister(tads2[j], tads1[i]) match = d_dist + scores[i-1][j-1] insert = scores[i-1][j] + pen delete = scores[i][j-1] + pen if d_dist < max_dist: scores[i][j] = max((insert, delete)) pen = ext_pen else: if match >= max(insert, delete): pen = ext_pen else: pen = ext_pen scores[i][j] = max((match, insert, delete)) align1 = [] align2 = [] i = l_tads1 -1 j = l_tads2 -1 max_score = float('-inf') while i and j: score = scores[i][j] if score > max_score: max_score = score d_dist = dister(tads2[j], tads1[i]) value = scores[i-1][j-1] + d_dist if _equal(score, value): align1.insert(0, tads1[i]) align2.insert(0, tads2[j]) i -= 1 j -= 1 elif _equal(score, scores[i-1][j] + penalty): align1.insert(0, tads1[i]) align2.insert(0, '-') i -= 1 elif _equal(score, scores[i-1][j] + ext_pen): align1.insert(0, tads1[i]) align2.insert(0, '-') i -= 1 elif _equal(score, scores[i][j-1] + penalty): align1.insert(0, '-') align2.insert(0, tads2[j]) j -= 1 elif _equal(score, scores[i][j-1] + ext_pen): align1.insert(0, '-') align2.insert(0, tads2[j]) j -= 1 else: for scr in scores: print(' '.join(['%6s' % (round(y, 2)) for y in scr])) raise Exception('Something is failing and it is my fault...', i, j, tads1[i], tads2[j]) while i: align1.insert(0, tads1[i]) align2.insert(0, '-') i -= 1 while j: align1.insert(0, '-') align2.insert(0, tads2[j]) j -= 1 if verbose: print('\n Alignment:') print('TADS 1: '+'|'.join(['%9s' % (str(int(x)) if x!='-' else '-'*3) \ for x in align1])) print('TADS 2: '+'|'.join(['%9s' % (str(int(x)) if x!='-' else '-'*3) \ for x in align2])) return [align1, align2], max_score
def _virgin_score(penalty, l_tads1, l_tads2): """ creates empty matrix """ zeros = [0.0 for _ in range(l_tads2)] return [[penalty * j for j in range(l_tads2)]] + \ [[penalty * i] + zeros for i in range(1, l_tads1)] def _equal(a, b, cut_off=1e-9): """ """ return abs(a-b) < cut_off