pommevilla / MetaFunPrimer

This SO answer should be informative, but I would like to avoid using BioPython to limit dependencies.

Also, while we're doing this, see about refactoring the lines below so that it uses a single re.finditer expression instead of two:

MetaFunPrimer/src/get_pcr_product.py

Lines 66 to 86 in f1f6438

    
           for x in fpri:  # x is the key of the dict fpri, x is the sequence 
        
               ma = [m.start() for m in re.finditer(x, se)]         
        
               if len(ma) > 0: 
        
                   for st in ma: 
        
                       tempseq = se[st:] 
        
                       for y in rpri: 
        
                           rma = [m.start() for m in re.finditer(y, tempseq)] 
        
                           if len(rma) > 0: 
        
                               # build an array based on x's forward primer cluster ID, [c001, c002, ...] 
        
                               fpri_li = fpri[x].split(",") 
        
                               for i, ff in enumerate(fpri_li): 
        
                                   f = ff.split(".")[-2] 
        
                                   fpri_li[i] = f 
        
                                   #print(fpri_li) 
        
                               # build an array based on y's reverse primer cluster ID, [c001, c002, ...] 
        
                               rpri_li = rpri[y].split(",") 
        
                               for j, rr in enumerate(rpri_li): 
        
                                   r = rr.split(".")[-2] 
        
                                   rpri_li[j] = r 
        
                               #print(rpri_li)

An example from the ISqPCR code I wrote for another app:

    import re
    forward_primer = replace_ambiguous_bases(forward_primer)
    reverse_primer = reverse_complement(replace_ambiguous_bases(reverse_primer))
    primer_pattern = re.compile('({}).*({})'.format(forward_primer, reverse_primer))
    # for match in [match for match in re.finditer(primer_pattern, target_sequence)]:
    for match in re.finditer(primer_pattern, target_sequence):
        product = target_sequence[match.start():match.end()]
        return '{}\t{}\t{}\t{}\t{}\t{}\n'.format(primer_name, target_name, match.start(), match.end(),
                                                 match.end() - match.start(), product)

	for x in fpri: # x is the key of the dict fpri, x is the sequence
	ma = [m.start() for m in re.finditer(x, se)]
	if len(ma) > 0:
	for st in ma:
	tempseq = se[st:]
	for y in rpri:
	rma = [m.start() for m in re.finditer(y, tempseq)]
	if len(rma) > 0:
	# build an array based on x's forward primer cluster ID, [c001, c002, ...]
	fpri_li = fpri[x].split(",")
	for i, ff in enumerate(fpri_li):
	f = ff.split(".")[-2]
	fpri_li[i] = f
	#print(fpri_li)

	# build an array based on y's reverse primer cluster ID, [c001, c002, ...]
	rpri_li = rpri[y].split(",")
	for j, rr in enumerate(rpri_li):
	r = rr.split(".")[-2]
	rpri_li[j] = r
	#print(rpri_li)

get_pcr_product.py: allow 1 mismatch when evaluating primer amplification