#!/usr/bin/env python
# -*- coding: utf-8 -*-
# doctest: +NORMALIZE_WHITESPACE
# doctest: +SKIP
'''This module provides functions for PCR. Primers with 5' tails as well as inverse PCR on
circular templates are handled correctly.
'''
import itertools
import math
import re
import textwrap
import copy
import operator
import os
from math import log10
from Bio.Seq import Seq
from Bio.Alphabet.IUPAC import ambiguous_dna
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils import GC
from Bio.SeqUtils.CheckSum import seguid
from Bio.SeqUtils.MeltingTemp import Tm_staluc
from Bio.SeqFeature import SeqFeature
from Bio.SeqFeature import CompoundLocation
from Bio.SeqFeature import FeatureLocation
from pydna.dsdna import rc
from pydna.dsdna import parse
from pydna.dsdna import Dseqrecord
from pydna.pretty import pretty_str
import shelve
def _annealing_positions(primer, template, limit=15):
'''Finds the annealing position(s) for a primer on a template where the
primer anneals perfectly with at least limit nucleotides in the 3' part.
start is a position (integer)
footprint1 and tail1 are strings.
::
<- - - - - - - - - - template - - - - - - - - - - - - - >
<------------- start ---->
5'-...gctactacacacgtactgactgcctccaagatagagtcagtaaccacactcgat...3'
||||||||||||||||||||||||||||||||||||||||||||||||
3'-gttctatctcagtcattggtgtATAGTG-5'
<tail>
<---footprint----->
<--------- primer ------>
Parameters
----------
primer : string
The primer sequence 5'-3'
template : string
The template sequence 5'-3'
limit : int = 15, optional
footprint needs to be at least of length limit.
Returns
-------
describe : list of tuples (int, string, string)
[ (start1, footprint1, tail1), (start2, footprint2, tail2),..., ]
'''
if len(primer)<limit:
return []
prc = rc(primer)
head = prc[:limit].upper()
table = {"R":"(A|G)",
"Y":"(C|T)",
"S":"(G|C)",
"W":"(A|T)",
"K":"(G|T)",
"M":"(A|C)",
"B":"(C|G|T)",
"D":"(A|G|T)",
"H":"(A|C|T)",
"V":"(A|C|G)",
"N":"(A|G|C|T)"}
for key in table:
head=head.replace(key, table[key])
positions = [m.start() for m in re.finditer('(?={})'.format(head), template, re.I)]
if positions:
tail = prc[limit:]
length = len(tail)
results = []
for match_start in positions:
tm = template[match_start+limit:match_start+limit+length]
footprint = rc(template[match_start:match_start+limit]+"".join([b for a,b in itertools.takewhile(lambda x: x[0].lower()==x[1].lower(), zip(tail, tm))]))
results.append((match_start, footprint, primer[: len(primer) - len(footprint) ]))
return results
return []
[docs]class Primer(SeqRecord):
'''This class holds information about a primer on a template '''
def __init__(self,
seq_obj,
position,
footprint,
tail):
self.position = position
self.footprint = footprint
self.tail = tail
seq_obj.seq.alphabet = ambiguous_dna
SeqRecord.__init__(self,
seq=seq_obj.seq,
id=seq_obj.id,
name=seq_obj.name,
description=seq_obj.description,
dbxrefs=seq_obj.dbxrefs,
features=seq_obj.features,
annotations=seq_obj.annotations,
letter_annotations=seq_obj.letter_annotations)
[docs]class Amplicon(Dseqrecord):
'''The Amplicon class holds information about a PCR reaction involving two
primers and one template. This class is used by the Anneal class and is not
meant to be instantiated directly.
Parameters
----------
forward_primer : SeqRecord(Biopython)
SeqRecord object holding the forward (sense) primer
reverse_primer : SeqRecord(Biopython)
SeqRecord object holding the reverse (antisense) primer
template : Dseqrecord
Dseqrecord object holding the template (circular or linear)
saltc : float, optional
saltc = monovalent cations (mM) (Na,K..)
default value is 50mM
This is used for Tm calculations.
forward_primer_concentration : float, optional
primer concentration (nM)
default set to 1000nM = 1µM
This is used for Tm calculations.
rc : float, optional
primer concentration (nM)
default set to 1000nM = 1µM
This is used for Tm calculations.
'''
def __init__( self,
record,
template=None,
forward_primer=None,
reverse_primer=None,
saltc=None,
forward_primer_concentration=None,
reverse_primer_concentration=None,
*args,
**kwargs):
#Dseqrecord.__init__(self,record,*args,**kwargs)
super(Amplicon, self).__init__(record, *args, **kwargs)
self.template = template
self.forward_primer = forward_primer
self.reverse_primer = reverse_primer
self.fwd_primer = self.forward_primer
self.rev_primer = self.reverse_primer
self.forward_primer_concentration = forward_primer_concentration
self.reverse_primer_concentration = reverse_primer_concentration
self.saltc = saltc
self.tmf = Tm_staluc(str(self.forward_primer.footprint),dnac=50, saltc=self.saltc)
self.tmr = Tm_staluc(str(self.reverse_primer.footprint),dnac=50, saltc=self.saltc)
self.tmf_dbd = tmbresluc(str(self.forward_primer.footprint),primerc=self.forward_primer_concentration)
self.tmr_dbd = tmbresluc(str(self.reverse_primer.footprint),primerc=self.reverse_primer_concentration)
def __getitem__(self, sl):
answer = copy.copy(self)
answer.seq = answer.seq.__getitem__(sl)
answer.seq.alphabet = self.seq.alphabet
sr = SeqRecord("n"*len(self))
sr.features = self.features
answer.features = SeqRecord.__getitem__(sr, sl).features
return answer
def __repr__(self):
'''returns a short string representation of the object'''
return "Amplicon({})".format(self.__len__())
[docs] def flankup(self, flankuplength=50):
'''Returns a Dseqrecord object containing flankuplength bases upstream of the forward primer footprint,
Truncated if the template is not long enough.
::
<--- flankup --->
5TAATAAactactgactatct3
||||||||||||||
acgcattcagctactgtactactgactatctatcg
'''
return self.template.seq[self.forward_primer.position-flankuplength-len(self.forward_primer.footprint):self.forward_primer.position-len(self.forward_primer.footprint)]
[docs] def flankdn(self, flankdnlength=50):
'''Returns a Dseqrecord object containing flankdnlength bases downstream of the reverse primer footprint.
Truncated if the template is not long enough.
::
<---- flankdn ------>
3actactgactatctTAATAA5
||||||||||||||
acgcattcagctactgtactactgactatctatcgtacatgtactatcgtat
'''
return self.template.seq[self.reverse_primer.position+len(self.reverse_primer.footprint):self.reverse_primer.position+flankdnlength+len(self.reverse_primer.footprint)]
[docs] def program(self):
'''Returns a string containing a text representation of a proposed
PCR program using Taq or similar polymerase.
::
Taq (rate 30 nt/s)
Three-step| 30 cycles | |SantaLucia 1998
94.0°C |94.0°C | |SaltC 50mM
__________|_____ 72.0°C |72.0°C|
04min00s |30s \ ________|______|
| \ 46.0°C/ 0min 1s|10min |
| \_____/ | |
| 30s | |4-8°C
'''
# Ta calculation according to
# Rychlik, Spencer, and Rhoads, 1990, Optimization of the anneal
# ing temperature for DNA amplification in vitro
# http://www.ncbi.nlm.nih.gov/pubmed/2003928
GC_prod=GC(str(self.seq))
tml = min(self.tmf,self.tmr)
#print GC(str(self.product.seq)), self.saltc/1000.0, len(self.product)
tmp = 81.5 + 0.41*GC(str(self.seq)) + 16.6*log10(self.saltc/1000.0) - 675/len(self)
ta = 0.3*tml+0.7*tmp-14.9
# Fermentas recombinant taq
taq_extension_rate = 30 # seconds/kB PCR product length
extension_time_taq = taq_extension_rate * len(self) / 1000 # seconds
f = textwrap.dedent(u'''
Taq (rate {rate} nt/s) 35 cycles |{size}bp
95.0°C |95.0°C | |SantaLucia 1998
|_________|_____ 72.0°C |72.0°C|SaltC {saltc:2}mM
| 03min00s|30s \ ________|______|
| | \ {ta}°C/{0:2}min{1:2}s| 5min |
| | \_____/ | |
| | 30s | |4-12°C'''.format(rate = taq_extension_rate,
ta = math.ceil(ta),
saltc = self.saltc,
*divmod(extension_time_taq,60),
size = len(self.seq)))
return pretty_str(f)
[docs] def taq_program(self):
return self.program()
[docs] def dbd_program(self):
'''Returns a string containing a text representation of a proposed
PCR program using a polymerase with a DNA binding domain such as Pfu-Sso7d.
::
Pfu-Sso7d (rate 15s/kb)
Three-step| 30 cycles | |Breslauer1986,SantaLucia1998
98.0°C |98.0°C | |SaltC 50mM
__________|_____ 72.0°C |72.0°C|Primer1C 1µM
00min30s |10s \ 61.0°C ________|______|Primer2C 1µM
| \______/ 0min 0s|10min |
| 10s | |4-8°C
'''
PfuSso7d_extension_rate = 15 #seconds/kB PCR product
extension_time_PfuSso7d = PfuSso7d_extension_rate * len(self) / 1000 # seconds
# Ta calculation for enzymes with dsDNA binding domains like Pfu-Sso7d
# https://www.finnzymes.fi/tm_determination.html
length_of_f = len(self.forward_primer.footprint)
length_of_r = len(self.reverse_primer.footprint)
if (length_of_f>20 and length_of_r>20 and self.tmf_dbd>=69.0 and self.tmr_dbd>=69.0) or (self.tmf_dbd>=72.0 and self.tmr_dbd>=72.0):
f=textwrap.dedent( '''
Pfu-Sso7d (rate {rate}s/kb)
Two-step| 30 cycles | |{size}bp
98.0°C |98.0C | |Breslauer1986,SantaLucia1998
_____ __|_____ | |SaltC {saltc:2}mM
00min30s|10s \ 72.0°C|72.0°C|Primer1C {forward_primer_concentration:3}µM
| \_______|______|Primer2C {reverse_primer_concentration:3}µM
| {0:2}min{1:2}s|10min |4-8°C
'''.format(rate = PfuSso7d_extension_rate,
forward_primer_concentration = self.forward_primer_concentration,
reverse_primer_concentration = self.rc,
saltc = self.saltc,
*divmod(extension_time_PfuSso7d,60),
size = len(self.seq)))
else:
if (length_of_f>20 and length_of_r>20):
ta = min(self.tmf_dbd,self.tmr_dbd)+3
else:
ta = min(self.tmf_dbd,self.tmr_dbd)
f=textwrap.dedent( '''
Pfu-Sso7d (rate {rate}s/kb)
Three-step| 30 cycles | |Breslauer1986,SantaLucia1998
98.0°C |98.0°C | |SaltC {saltc:2}mM
__________|_____ 72.0°C |72.0°C|Primer1C {forward_primer_concentration:3}µM
00min30s |10s \ {ta}°C ________|______|Primer2C {reverse_primer_concentration:3}µM
| \______/{0:2}min{1:2}s|10min |
| 10s | |4-8°C
'''.format(rate = PfuSso7d_extension_rate,
ta = math.ceil(ta),
forward_primer_concentration = self.forward_primer_concentration/1000,
reverse_primer_concentration = self.reverse_primer_concentration/1000,
saltc= self.saltc,
*divmod(extension_time_PfuSso7d,60)))
return pretty_str(f)
[docs]class Anneal(object):
u'''
Parameters
----------
primers : iterable containing SeqRecord objects
Primer sequences 5'-3'.
template : Dseqrecord object
The template sequence 5'-3'.
limit : int, optional
limit length of the annealing part of the primers.
Attributes
----------
products: list
A list of Amplicon objects, one for each primer pair that may form a PCR product.
Examples
--------
>>> import pydna
>>> template = pydna.Dseqrecord("tacactcaccgtctatcattatctactatcgactgtatcatctgatagcac")
>>> from Bio.SeqRecord import SeqRecord
>>> p1 = pydna.read(">p1\\ntacactcaccgtctatcattatc", ds = False)
>>> p2 = pydna.read(">p2\\ngtgctatcagatgatacagtcg", ds = False)
>>> ann = pydna.Anneal((p1, p2), template)
>>> print ann.report()
Template <unknown name> 51 nt linear:
Primer p1 anneals forward at position 23
<BLANKLINE>
Primer p2 anneals reverse at position 29
>>> ann.products
[Amplicon(51)]
>>> amplicon_list = ann.products
>>> amplicon = amplicon_list.pop()
>>> amplicon
Amplicon(51)
>>> print amplicon.figure()
5tacactcaccgtctatcattatc...cgactgtatcatctgatagcac3
|||||||||||||||||||||| tm 50.6 (dbd) 60.5
3gctgacatagtagactatcgtg5
5tacactcaccgtctatcattatc3
||||||||||||||||||||||| tm 49.4 (dbd) 58.8
3atgtgagtggcagatagtaatag...gctgacatagtagactatcgtg5
>>> amplicon.annotations['date'] = '02-FEB-2013' # Set the date for this example to pass the doctest
>>> print amplicon
Dseqrecord
circular: False
size: 51
ID: 51bp U96+TO06Y6pFs74SQx8M1IVTBiY
Name: 51bp_PCR_prod
Description: Product_p1_p2
Number of features: 4
/date=02-FEB-2013
Dseq(-51)
taca..gcac
atgt..cgtg
>>> print amplicon.program()
<BLANKLINE>
Taq (rate 30 nt/s) 35 cycles |51bp
95.0°C |95.0°C | |SantaLucia 1998
|_________|_____ 72.0°C |72.0°C|SaltC 50mM
| 03min00s|30s \ ________|______|
| | \ 44.0°C/ 0min 1s| 5min |
| | \_____/ | |
| | 30s | |4-12°C
>>>
'''
def __init__( self,
primers,
template,
limit=13):
key = str(template.seguid()) + "|".join(sorted([seguid(p.seq) for p in primers]))+str(limit)
cache = shelve.open(os.path.join(os.environ["datadir"], "amplify.shelf"), protocol=2, writeback=False)
refresh = False
cached = None
primers = [p for p in primers if p.seq]
if os.environ["pydna_cache"] in ("compare", "cached"):
try:
cached = cache[key]
except KeyError:
if os.environ["pydna_cache"] == "compare":
raise Exception("no result for this key!")
else:
refresh = True
if refresh or os.environ["pydna_cache"] in ("compare", "refresh", "nocache"):
self.key = key
self.template = copy.deepcopy(template)
#self.template.features = copy.deepcopy(template.features)
self.limit = limit
self._products = None
self.fwd_primers = []
self.rev_primers = []
twl = len(self.template.seq.watson)
tcl = len(self.template.seq.crick)
if self.template.linear:
tw = self.template.seq.watson
tc = self.template.seq.crick
else:
tw = self.template.seq.watson+self.template.seq.watson
tc = self.template.seq.crick +self.template.seq.crick
for p in primers:
self.fwd_primers.extend((Primer(p,
tcl-pos - min(self.template.seq.ovhg, 0),
Seq(fp), Seq(tl))
for pos, fp, tl in _annealing_positions(
str(p.seq),
tc,
limit) if pos<tcl))
self.rev_primers.extend((Primer(p,
pos + max(0, self.template.seq.ovhg),
Seq(fp), Seq(tl))
for pos, fp, tl in _annealing_positions(
str(p.seq),
tw,
limit) if pos<twl))
self.fwd_primers.sort(key = operator.attrgetter('position'))
self.rev_primers.sort(key = operator.attrgetter('position'), reverse=True)
for fp in self.fwd_primers:
if fp.position-len(fp.footprint)>=0:
start = fp.position-len(fp.footprint)
end = fp.position
self.template.features.append(SeqFeature(FeatureLocation(start, end),
type ="primer_bind",
strand = 1,
qualifiers = {"note":[fp.name],
"ApEinfo_fwdcolor":["green"],
"ApEinfo_revcolor":["red"]}))
else:
start = len(self.template)-len(fp.footprint)+fp.position
end = start+len(fp.footprint)-len(self.template)
sf=SeqFeature(CompoundLocation([FeatureLocation(start,len(self.template)),
FeatureLocation(0, end)]),
type="primer_bind",
location_operator="join",
qualifiers = {"note":[fp.name]})
self.template.features.append(sf)
for rp in self.rev_primers:
if rp.position+len(rp.footprint)<=len(self.template):
start = rp.position
end = rp.position + len(rp.footprint)
self.template.features.append(SeqFeature(FeatureLocation(start,end),
type ="primer_bind",
strand = -1,
qualifiers = {"note":[rp.name],
"ApEinfo_fwdcolor":["green"],
"ApEinfo_revcolor":["red"]}))
else:
start = rp.position
end = rp.position+len(rp.footprint)-len(self.template)
self.template.features.append(SeqFeature(CompoundLocation([FeatureLocation(start,len(self.template)),
FeatureLocation(0,end)]),
type ="primer_bind",
location_operator= "join",
strand = -1,
qualifiers = {"note":[rp.name]}))
self.forward_primers = self.fwd_primers
self.reverse_primers = self.rev_primers
if os.environ["pydna_cache"] == "compare":
self._compare(cached)
if refresh or os.environ["pydna_cache"] == "refresh":
self._save()
elif cached and os.environ["pydna_cache"] not in ("nocache","refresh"):
for key, value in cached.__dict__.items():
setattr(self, key, value )
cache.close()
def _compare(self, cached):
if str(self) != str(cached):
module_logger.warning('amplify error')
def _save(self):
cache = shelve.open(os.path.join(os.environ["datadir"], "amplify.shelf"), protocol=2, writeback=False)
cache[self.key] = self
cache.close()
@property
[docs] def products(self):
if self._products:
return self._products
self._products = []
for fp in self.fwd_primers:
for rp in self.rev_primers:
if self.template.circular and fp.position>rp.position:
tmpl = self.template.shifted(fp.position-len(fp.footprint))
tmpl = tmpl._multiply_circular(2)
tmpl = tmpl[:len(self.template) - (fp.position - rp.position) + len(rp.footprint) + len(fp.footprint)]
#print len(self.template) - (fp.position - rp.position) + len(rp.footprint) + len(fp.footprint)
#print len(tmpl)
elif self.template.circular:
tmpl = self.template._multiply_circular(3)
tmpl = tmpl[fp.position-len(fp.footprint)+len(self.template):rp.position+len(rp.footprint)+len(self.template)]
else:
tmpl = self.template[fp.position-len(fp.footprint):rp.position+len(rp.footprint)]
prd = ( Dseqrecord(fp.tail) + tmpl + Dseqrecord(rp.tail).reverse_complement())
prd.add_feature( 0, len(fp), label=fp.id)
prd.add_feature( len(prd)-len(rp),len(prd),label=rp.id, strand=-1)
#prd.seq = fp.seq+tmpl.seq[len(fp.footprint):len(tmpl)-len(rp.footprint)]+rp.seq.reverse_complement()
#features = tmpl[fp.position-len(fp.footprint):rp.position+len(rp.footprint)].features
#print fp.position-len(fp.footprint), rp.position+len(rp.footprint), features
#print "<<<<<<<<<<<<<<",features
#prd.features = [f._shift(len(fp.tail)) for f in features]
# description = Genbank LOCUS max 16 chars
prd.name = "{0}bp_PCR_prod".format(len(prd))[:16]
prd.id = "{0}bp {1}".format( str(len(prd))[:14], prd.seguid() )
prd.description="Product_{0}_{1}".format( fp.description,
rp.description)
self._products.append( Amplicon(prd,
template=tmpl,
forward_primer=fp,
reverse_primer=rp,
saltc=50,
forward_primer_concentration=1000,
reverse_primer_concentration=1000))
assert " " not in str(prd.seq.watson)
assert " " not in str(prd.seq.crick)
return self._products
[docs] def report(self):
'''This method is an alias of str(Annealobj).
Returns a short string representation.
'''
return self.__str__()
def __repr__(self):
''' returns a short string representation '''
return "Reaction(products = {})".format(len(self.fwd_primers*len(self.rev_primers)))
def __str__(self):
'''returns a short report describing if or where primer
anneal on the template.
'''
mystring = "Template {name} {size} nt {top}:\n".format(name=self.template.name,
size=len(self.template),
top={True:"circular",
False:"linear"}[self.template.circular]
)
if self.fwd_primers:
for p in self.fwd_primers:
mystring += "Primer {name} anneals forward at position {pos}\n".format(name=p.name, pos=p.position)
else:
mystring += "No forward primers anneal...\n"
mystring +="\n"
if self.rev_primers:
for p in self.rev_primers:
mystring += "Primer {name} anneals reverse at position {pos}\n".format(name=p.name, pos=p.position)
else:
mystring += "No reverse primers anneal...\n"
return mystring.strip()
[docs]def pcr(*args, **kwargs):
'''pcr is a convenience function for Anneal to simplify its usage,
especially from the command line. If more than one PCR product is
formed, an exception is raised.
args is any iterable of sequences or an iterable of iterables of sequences.
args will be greedily flattened.
Parameters
----------
args : iterable containing sequence objects
Several arguments are also accepted.
limit : int = 13, optional
limit length of the annealing part of the primers.
Notes
-----
sequences in args could be of type:
* string
* Seq
* SeqRecord
* Dseqrecord
The last sequence will be interpreted as the template
all preceeding sequences as primers.
This is a powerful function, use with care!
Returns
-------
product : Dseqrecord
a Dseqrecord object representing the PCR product.
The direction of the PCR product will be the same as
for the template sequence.
Examples
--------
>>> import pydna
>>> template = pydna.Dseqrecord("tacactcaccgtctatcattatctactatcgactgtatcatctgatagcac")
>>> from Bio.SeqRecord import SeqRecord
>>> p1 = pydna.read(">p1\\ntacactcaccgtctatcattatc", ds = False)
>>> p2 = pydna.read(">p2\\ncgactgtatcatctgatagcac", ds = False).reverse_complement()
>>> pydna.pcr(p1, p2, template)
Amplicon(51)
>>> pydna.pcr([p1, p2], template)
Amplicon(51)
>>> pydna.pcr((p1,p2,), template)
Amplicon(51)
>>>
'''
from Bio.SeqRecord import SeqRecord
# flatten args
output = []
stack = []
stack.extend(reversed(args))
while stack:
top = stack.pop()
if hasattr(top, "__iter__") and not isinstance(top, SeqRecord):
stack.extend(reversed(top))
else:
output.append(top)
new=[]
for s in output:
if isinstance(s, Seq):
s = SeqRecord(s)
elif isinstance(s, SeqRecord):
pass
elif hasattr(s, "watson"):
s=s.watson
elif isinstance(s, basestring):
s = SeqRecord(Seq(s))
else:
raise TypeError("the record property needs to be a string, a Seq object or a SeqRecord object")
new.append(s)
anneal_primers = Anneal( new[:-1],
new[-1],
**kwargs)
if anneal_primers:
if len(anneal_primers.products) == 1:
return anneal_primers.products.pop()
elif len(anneal_primers.products) == 0:
raise Exception("No PCR products! {}".format(anneal_primers.report()))
else:
raise Exception("PCR not specific! {}".format(anneal_primers.report()))
else:
raise Exception(anneal_primers.report())
return
[docs]def nopcr(*args, **kwargs):
'''pcr is a convenience function for Anneal to simplify its usage,
especially from the command line. If more one or more PCR products are
formed, an exception is raised.
args is any iterable of sequences or an iterable of iterables of sequences.
args will be greedily flattened.
Parameters
----------
args : iterable containing sequence objects
Several arguments are also accepted.
limit : int = 13, optional
limit length of the annealing part of the primers.
Notes
-----
sequences in args could be of type:
* string
* Seq
* SeqRecord
* Dseqrecord
The last sequence will be interpreted as the template
all preceeding sequences as primers.
This is a powerful function, use with care!
Returns
-------
product : Dseqrecord
a Dseqrecord object representing the PCR product.
The direction of the PCR product will be the same as
for the template sequence.
Examples
--------
>>> import pydna
>>> template = pydna.Dseqrecord("tacactcaccgtctatcattatctactatcgactgtatcatctgatagcac")
>>> from Bio.SeqRecord import SeqRecord
>>> from Bio.Seq import Seq
>>> p1 = SeqRecord(Seq("tacactcaccgtctatcattatc"))
>>> p2 = SeqRecord(Seq("gtgctatcagatgatacagtG")) # This primer does not anneal
>>> pydna.nopcr(p1, p2, template)
True
'''
from Bio.SeqRecord import SeqRecord
# flatten args
output = []
stack = []
stack.extend(reversed(args))
while stack:
top = stack.pop()
if hasattr(top, "__iter__") and not isinstance(top, SeqRecord):
stack.extend(reversed(top))
else:
output.append(top)
new=[]
for s in output:
if isinstance(s, Seq):
s = SeqRecord(s)
elif isinstance(s, SeqRecord):
pass
elif hasattr(s, "watson"):
s=s.watson
elif isinstance(s, basestring):
s = SeqRecord(Seq(s))
else:
raise TypeError("the record property needs to be a string, a Seq object or a SeqRecord object")
new.append(s)
anneal_primers = Anneal( new[:-1],
new[-1],
**kwargs)
if anneal_primers:
if len(anneal_primers.products) != 0:
raise Exception("PCR products formed! {}".format(anneal_primers.report()))
else:
raise Exception(anneal_primers.report())
return True
[docs]def basictm(primer, *args, **kwargs):
'''Returns the melting temperature (Tm) of the primer using
the basic formula.
| Tm = (wA+xT)*2 + (yG+zC)*4 assumed 50mM monovalent cations
|
| w = number of A in primer
| x = number of T in primer
| y = number of G in primer
| z = number of C in primer
Parameters
----------
primer : string
Primer sequence 5'-3'
Returns
-------
tm : int
Examples
--------
>>> from pydna.amplify import basictm
>>> basictm("ggatcc")
20
>>>
'''
primer = str(primer).lower()
return (primer.count("a") + primer.count("t"))*2 + (primer.count("g") + primer.count("c"))*4
# http://www.promega.com/techserv/tools/biomath/calc11.htm#melt_results
[docs]def tmstaluc98(primer, dnac=50, saltc=50, **kwargs):
'''Returns the melting temperature (Tm) of the primer using
the nearest neighbour algorithm. Formula and thermodynamic data
is taken from SantaLucia 1998 [1]_. This implementation gives the same
answer as the one provided by Biopython (See Examples).
Thermodynamic data used:
===== ==== ====
pair dH dS
===== ==== ====
AA/TT 7.9 22.2
AT/TA 7.2 20.4
TA/AT 7.2 21.3
CA/GT 8.5 22.7
GT/CA 8.4 22.4
CT/GA 7.8 21.0
GA/CT 8.2 22.2
CG/GC 10.6 27.2
GC/CG 9.8 24.4
GG/CC 8.0 19.9
===== ==== ====
Parameters
----------
primer : string
Primer sequence 5'-3' in UPPERCASE
Returns
-------
tm : float
tm of the primer
Examples
--------
>>> from pydna.amplify import tmstaluc98
>>> from Bio.SeqUtils.MeltingTemp import Tm_staluc
>>> tmstaluc98("ACGTCATCGACACTATCATCGAC")
54.55597724052518
>>>
'''
nntermsl={ "AA": (7.9 , 22.2),
"TT": (7.9 , 22.2),
"AT": (7.2 , 20.4),
"TA": (7.2 , 21.3),
"CA": (8.5 , 22.7),
"TG": (8.5 , 22.7),
"GT": (8.4 , 22.4),
"AC": (8.4 , 22.4),
"CT": (7.8 , 21.0),
"AG": (7.8 , 21.0),
"GA": (8.2 , 22.2),
"TC": (8.2 , 22.2),
"CG": (10.6 , 27.2),
"GC": (9.8 , 24.4),
"GG": (8 , 19.9),
"CC": (8 , 19.9),
"A" : (0 , 0 ),
"C" : (0 , 0 ),
"G" : (0 , 0 ),
"T" : (0 , 0 ) }
helixinit = { "G": (-0.1 ,2.8),
"C": (-0.1 ,2.8),
"A": (-2.3, -4.1),
"T": (-2.3, -4.1) }
dH, dS = helixinit[primer[0]]
H , S = helixinit[primer[-1]]
dH = dH+H
dS = dS+S
for p in range(len(primer)):
dn = primer[p:p+2]
H,S = nntermsl[dn]
dH+=H
dS+=S
R = 1.987 # universal gas constant in Cal/degrees C*Mol
k = (dnac/4.0)*1e-9
dS = dS-0.368*(len(primer)-1)*math.log(float(saltc)/1e3)
tm = ((1000* (-dH))/(-dS+(R * (math.log(k)))))-273.15
return tm
[docs]def tmbreslauer86(primer, dnac=500.0, saltc=50,thermodynamics=False):
'''Returns the melting temperature (Tm) of the primer using
the nearest neighbour algorithm. Formula and thermodynamic data
is taken from Breslauer 1986. These data are no longer widely used.
Breslauer 1986, table 2 [#]_
===== ===== ==== ===
pair dH dS dG
===== ===== ==== ===
AA/TT 9.1 24.0 1.9
AT/TA 8.6 23.9 1.5
TA/AT 6.0 16.9 0.9
CA/GT 5.8 12.9 1.9
GT/CA 6.5 17.3 1.3
CT/GA 7.8 20.8 1.6
GA/CT 5.6 13.5 1.6
CG/GC 11.9 27.8 3.6
GC/CG 11.1 26.7 3.1
GG/CC 11.0 26.6 3.1
===== ===== ==== ===
Parameters
----------
primer : string
Primer sequence 5'-3' in UPPERCASE
Returns
-------
tm : float
References
----------
.. [#] K.J. Breslauer et al., “Predicting DNA Duplex Stability from the Base Sequence,” Proceedings of the National Academy of Sciences 83, no. 11 (1986): 3746.
Examples
---------
>>> from pydna.amplify import tmbreslauer86
>>> tmbreslauer86("ACGTCATCGACACTATCATCGAC")
64.28863985851899
'''
nntermbr={ "AA": (9.1 ,24.0 ,1.9),
"TT": (9.1 ,24.0 ,1.9),
"AT": (8.6 ,23.9 ,1.5),
"TA": (6.0 ,16.9 ,0.9),
"CA": (5.8 ,12.9 ,1.9),
"TG": (5.8 ,12.9 ,1.9),
"GT": (6.5 ,17.3 ,1.3),
"AC": (6.5 ,17.3 ,1.3),
"CT": (7.8 ,20.8 ,1.6),
"AG": (7.8 ,20.8 ,1.6),
"GA": (5.6 ,13.5 ,1.6),
"TC": (5.6 ,13.5 ,1.6),
"CG": (11.9 ,27.8 ,3.6),
"GC": (11.1 ,26.7 ,3.1),
"GG": (11.0 ,26.6 ,3.1),
"CC": (11.0 ,26.6 ,3.1),
"A" : (0 , 0 ,0),
"C" : (0 , 0 ,0),
"G" : (0 , 0 ,0),
"T" : (0 , 0 ,0), }
dH=3.4
dS=12.4
dG=0
for p in range(len(primer)):
dn = primer[p:p+2]
H,S,G = nntermbr[dn]
dG+=G
dH+=H
dS+=S
R = 1.9872 # universal gas constant in Cal/degrees C*Mol
k = dnac*1E-9/2.0
dH = dH - 5
dS = dS-0.368*(len(primer)-1)*math.log(float(saltc)/1E3) # SantaLucia salt correction formula
tm = 1000 * -dH /(-dS + R * math.log(k) ) - 273.15 # degrees Celsius
if thermodynamics:
return tm,dH,dS
else:
return tm
[docs]def tmbresluc(primer, primerc=500.0, saltc=50, thermodynamics=False):
'''Returns the tm for a primer using a formula adapted to polymerases
with a DNA binding domain.
Parameters
----------
primer : string
primer sequence 5'-3'
primerc : float
concentration (nM)
saltc : float, optional
Monovalent cation concentration (mM)
thermodynamics : bool, optional
prints details of the thermodynamic data to stdout. For
debugging only.
Returns
-------
tm : float
the tm of the primer
tm,dH,dS : tuple
tm and dH and dS used for the calculation
'''
import thermodynamic_data
saltc = float(saltc)/1000
pri = primerc/10E7
dS = -12.4
dH = -3400
STR = primer.lower();
for i in range(len(STR)-1):
n1=ord(STR[i])
n2=ord(STR[i+1])
dH += thermodynamic_data.dHBr[n1 - 97][n2 - 97]
dS += thermodynamic_data.dSBr[n1 - 97][n2 - 97]
tm = (dH / (1.9872 * math.log(pri / 1600) + dS) + (16.6 * math.log(saltc)) / math.log(10)) - 273.15
if thermodynamics:
return tm,dH,dS
else:
return tm
if __name__=="__main__":
import doctest
doctest.testmod()