from copy import deepcopy
AA_NAMES = (
("alanine", "ala", "A"),
("cysteine", "cys", "C"),
("aspartate", "asp", "D"),
("glutamate", "glu", "E"),
("phenylalanine", "phe", "F"),
("glycine", "gly", "G"),
("histidine", "his", "H"),
("isoleucine", "ile", "I"),
("lysine", "lys", "K"),
("leucine", "leu", "L"),
("methionine", "met", "M"),
("asparagine", "asn", "N"),
("proline", "pro", "P"),
("glutamine", "gln", "Q"),
("arginine", "arg", "R"),
("serine", "ser", "S"),
("threonine", "thr", "T"),
("valine", "val", "V"),
("tryptophan", "trp", "W"),
("tyrosine", "tyr", "Y"),
("selenocysteine", "sec", "U"),
("pyrrolysine", "pyl", "O"),
("asparagine/aspartate", "asx", "B"),
("glutamine/glutamate", "glx", "Z"),
("leucine/isoleucine", "xle", "J"),
("undetermined", "xaa", "X"),
("gap", "gap", "-"),
("termination", "term", "|")
)
AA_NAMES_CLASSIC_RANGE = (0, 20)
AA_NAMES_EXTENDED_RANGE = (0, 26)
AA_NAMES_GAP_INDEX = 26
AA_NAMES_TERM_INDEX = 27
[docs]class AminoAcid:
"""
Represents one of the amino acids that can be found in genetic sequences.
Can be one of the following :
- any of the twenty amino acids
- any of four combinations of possible amino acids
- selenocysteine, pyrrolysine, a gap or termination codon
The full list of possible amino acids is defined by AA_NAMES.
"""
# Dictionary mapping name to id
_nameDict = {AA_NAMES[id][i]: id for i in range(3) for id in range(len(AA_NAMES))}
_nameModes = {"long": 0, "medium": 1, "short": 2} # choices for name length
_defaultNameMode = "short" # short name by default
[docs] def __init__(self, aminoAcid):
"""
Creates an AminoAcid object representing one of the possible amino acids.
@param aminoAcid can be the name of an amino acid, or an AminoAcid object (in which case a copy is created).
"""
self._id = None # id of the amino acid within the name group
if isinstance(aminoAcid, str):
if len(aminoAcid) == 1:
self._id = self.__getIdByName(aminoAcid.upper()) # id from short aminoAcid name
else:
self._id = self.__getIdByName(aminoAcid.lower()) # id from other aminoAcid name
elif isinstance(aminoAcid, AminoAcid):
self._id = aminoAcid._id # copy of id
else:
raise TypeError("aminoAcid must be a string or an AminoAcid object")
@staticmethod
def __getIdByName(name):
try:
return AminoAcid._nameDict[name] # get index of name mode
except:
raise ValueError("Could not find amino acid name {}".format(name))
@staticmethod
def __getNameModeIndex(nameMode):
try:
return AminoAcid._nameModes[nameMode] # get index of name mode
except:
raise TypeError("nameMode must be 'short', 'medium' or 'long'")
@staticmethod
[docs] def getNames(nameMode=_defaultNameMode):
"""Yields the names of the 20 basic amino acids."""
start, stop = AA_NAMES_CLASSIC_RANGE
yield from AminoAcid.getNamesInRange(start, stop, nameMode)
@staticmethod
[docs] def getAllNames(nameMode=_defaultNameMode):
"""Yields the names of all represented amino acids, excepting gaps and termination codons."""
start, stop = AA_NAMES_EXTENDED_RANGE
yield from AminoAcid.getNamesInRange(start, stop, nameMode)
@staticmethod
[docs] def getNamesInRange(startIndex, stopIndex, nameMode=_defaultNameMode):
"""Yields the names of amino acids in AA_NAMES, from startIndex to stopIndex (excluded)."""
nameModeIndex = AminoAcid.__getNameModeIndex(nameMode)
for aa in AA_NAMES[startIndex:stopIndex]:
yield aa[nameModeIndex]
[docs] def isGap(self):
"""True if this amino acid is a gap, false otherwise."""
return self._id == AA_NAMES_GAP_INDEX
[docs] def isTermination(self):
"""True if this amino acid is a termination codon, false otherwise."""
return self._id == AA_NAMES_TERM_INDEX
[docs] def getName(self, nameMode=_defaultNameMode):
try:
nameIndex = AminoAcid._nameModes[nameMode] # get index of name mode
except:
raise TypeError("nameMode must be 'short', 'medium' or 'long'")
return AA_NAMES[self._id][nameIndex]
def __repr__(self):
"""Equivalent to getName()"""
return self.getName()
def __str__(self):
"""Equivalent to getName()."""
return self.getName() # default name mode
# Comparison and hashing allow to manipulate and sort instances more efficiently
# these functions do not have any biological meaning and their results may change over time.
def __eq__(self, other):
return self._id == other._id
def __ne__(self, other):
return self._id != other._id
def __gt__(self, other):
return self._id > other._id
def __ge__(self, other):
return self._id >= other._id
def __lt__(self, other):
return self._id < other._id
def __le__(self, other):
return self._id <= other._id
def __hash__(self):
return hash(self._id)