Source code for dnachisel.Specification

"""Base class for specification.

Notable features implemented here:

- Many empty methods that features subclass will overwrite
- Feature import/export from/to Genbank features.
"""
import copy
import re

from .biotools import find_specification_in_feature
from .Location import Location
from Bio.SeqFeature import SeqFeature


[docs]class Specification: """General class to define specifications to optimize. Note that all specifications have a ``boost`` attribute that is a multiplicator that will be used when computing the global specification score of a problem with ``problem.all_objectives_score()``. New types of specifications are defined by subclassing ``Specification`` and providing a custom ``evaluate`` and ``localized`` methods. Parameters ----------- evaluate function (sequence) => SpecEvaluation boost Relative importance of the Specification's score in a multi-specification problem. """ best_possible_score = None optimize_passively = False enforced_by_nucleotide_restrictions = False priority = 0 is_void = False def __init__(self, evaluate=None, boost=1.0): """Initialize.""" self.boost = boost if evaluate is not None: self.evaluate = evaluate
[docs] def localized(self, location, problem=None): """Return a modified version of the specification for the case where sequence modifications are only performed inside the provided location. For instance if an specification concerns local GC content, and we are only making local mutations to destroy a restriction site, then we only need to check the local GC content around the restriction site after each mutation (and not compute it for the whole sequence), so ``EnforceGCContent.localized(location)`` will return an specification that only looks for GC content around the provided location. If an specification concerns a DNA segment that is completely disjoint from the provided location, this must return None. Must return an object of class ``Constraint``. """ return self
[docs] def copy_with_changes(self, **kwargs): """Return a copy of the Specification with modified properties. For instance ``new_spec = spec.copy_with_changes(boost=10)``. """ new_specification = copy.copy(self) new_specification.__dict__.update(kwargs) return new_specification
[docs] def shifted(self, shift): """Shift the location of the specification. Some specification classes may have a special method to do side effects when shifting the location. Location shifting is used in particular when solving circular DNA optimization problems. """ new_location = None if self.location is None else self.location + shift return self.copy_with_changes(location=new_location, derived_from=self)
[docs] def initialized_on_problem(self, problem, role="constraint"): """Complete specification initialization when the sequence gets known. Some specifications like to know what their role is and on which sequence they are employed before they complete some values. """ return self
[docs] @staticmethod def from_biopython_feature(feature, specifications_dict): """Parse a Biopython feature create an annotation. The specifications_dict enables to map specification names to the actual implemented class. """ # PARSE THE SPECIFICATION, IDENTIFY THE TYPE AND ARGUMENTS label = find_specification_in_feature(feature) if isinstance(label, list): label = label[0] if not label.endswith(")"): # Standardizes the expression: @cds => @cds() label += "()" # The regular expression below detects spec definitions: # ~Avoidpattern(ARGS) => ~, AvoidPattern, ARGS pattern = r"([@~])(\S+)(\(.*\))" match = re.match(pattern, label) role, specification, parameters = match.groups() if specification not in specifications_dict: raise TypeError("Unknown specification %s" % specification) specification_class = specifications_dict[specification] role = {"@": "constraint", "~": "objective"}[role] # PARSE THE ARGUMENTS AND KEYWORD ARGUMENTS def format_value(value): """Converts stringed integers and floats back to numerical. Also converts "'bla'" => "bla" If the value is a list, apply to all elements.""" if isinstance(value, (list, tuple)): return [format_value(v) for v in value] match = re.match(r"'(.*)'", value) if match is not None: return match.groups()[0] else: try: return int(value) except ValueError: try: return float(value) except Exception: return value args, kwargs = [], {} for arg in parameters[1:-1].split(", "): if arg == "": continue if ":" in arg: key, value = arg.split(":") if "|" in value: value = value.split("|") kwargs[key] = format_value(value) elif "=" in arg: key, value = arg.split("=") if "|" in value: value = value.split("|") kwargs[key] = format_value(value) else: args.append(format_value(arg)) kwargs["location"] = Location.from_biopython_location(feature.location) # ATTEMPT TO CREATE A SPECIFICATION WITH THE GIVEN TYPE AND ARGS try: specification_instance = specification_class(*args, **kwargs) except TypeError as err: message = err.args[0] faulty_parameter = message.split("'")[1] raise TypeError( "Unknown parameter %s for specification %s " "at location %s" % (faulty_parameter, specification, kwargs["location"]) ) return role, specification_instance
def label( self, role=None, with_location=True, assignment=":", use_short_form=False, ): prefix = {"constraint": "@", "objective": "~", None: ""}[role] if use_short_form: label = self.short_label() if with_location: label += ", %s" % self.location return label if with_location and hasattr(self, "location") and self.location: location = "[%s]" % self.location else: location = "" params = self.label_parameters() if params == []: params = "" else: params = "(%s)" % ", ".join( [ assignment.join(map(str, p)) if isinstance(p, tuple) else p for p in params ] ) return "".join([prefix, self.__class__.__name__, location, params])
[docs] def short_label(self): """Shorter, less precise label to be used in tables, reports, etc. This is meant for specifications such as EnforceGCContent(0.4, 0.6) to be represented as '40-60% GC' in reports tables etc.. """ return self.__class__.__name__
[docs] def label_parameters(self): """In subclasses, returns a list of the creation parameters. For instance [('pattern', 'ATT'), ('occurences', 2)] """ return []
def __str__(self): """By default, represent the Specification using its label()""" return self.label() def __repr__(self): """By default, represent the Specification using its label()""" return self.label()
[docs] def to_biopython_feature( self, feature_type="misc_feature", role="constraint", colors_dict=None, use_short_label=True, **qualifiers ): """Return a Biopython feature representing the specification. The feature label is a string representation of the specification, and its location indicates the specification's scope. This method is primarily meant to display specifications in Genbank. They may result in "viable", DnaChisel-compatible annotations that can be imported back into DNA Chisel, but this is not the intended goal. """ if colors_dict is None: colors_dict = {"constraint": "#355c87", "objective": "#f9cd60"} qualifiers["role"] = role if "label" not in qualifiers: qualifiers["label"] = self.label( role=role, with_location=False, assignment=":", use_short_form=use_short_label, ) if "color" not in qualifiers: qualifiers['color'] = colors_dict[role] qualifiers.update( dict( ApEinfo_fwdcolor=qualifiers['color'], ApEinfo_revcolor=qualifiers['color'], ) ) return SeqFeature( self.location.to_biopython_location(), type=feature_type, qualifiers=qualifiers, )
[docs] def restrict_nucleotides(self, sequence, location=None): """Restrict the mutation space to speed up optimization. This method only kicks in when this specification is used as a constraint. By default it does nothing, but subclasses such as EnforceTranslation, AvoidChanges, EnforceSequence, etc. have custom methods. In the code, this method is run during the initialize() step of DNAOptimizationProblem, when the MutationSpace is created for each constraint """ return []
[docs] def as_passive_objective(self): """Return a copy with optimize_passively set to true. "Optimize passively" means that when the specification is used as an objective, the solver will not do a specific pass to optimize this specification, however this specification's score will be taken into account in the global score when optimizing other objectives, and may therefore influence the final sequence. """ return self.copy_with_changes(optimize_passively=True)
def _copy_with_full_span_if_no_location(self, problem): """Return either self, or a copy with location "everywhere". And by "everywhere" we mean Location(0, L) where L is the problem's sequence length. Most Specifications use this method in their "initialized_on_problem()" custom method. """ if self.location is None: location = Location(0, len(problem.sequence), 1) return self.copy_with_changes(location=location) else: return self
class SpecificationsSet: """Generic class for writing Specs which are actually made of more specs. Behaves as a Specification when it comes to instanciation, reading it from annotated records, etc. but the initialization actually creates a dictionnary of standard Specifications in the DNAOptimizationProblem """ def register_specifications(self, specifications): for name, spec in specifications.items(): spec.parent_specification = self spec.name_in_parent = name self.specifications = specifications