Source code for pyprot.data.dssp

class DSSP:
    def __init__(self, filePath):
[docs]
[docs] # Interesting columns : (start index, end index) self.columns = ("RESIDUE", "AA", "STRUCTURE") self.residues = [] # Metadata self.identifier = "" self.protein = "" self.organism = "" # Parsing with open(filePath, 'r') as dsspFile: columnIndex = {col: (0, 0) for col in self.columns} lineIsData = False for line in dsspFile.readlines(): if lineIsData: data = [] for column in self.columns: start, end = columnIndex[column] data.append(line[start:end]) self.residues.append(data) else: if line.strip()[0] == "#": lineIsData = True for column in self.columns: startIndex = line.find(column) endIndex = startIndex + len(column) endIndex = endIndex + (len(line[endIndex:]) - len(line[endIndex:].lstrip())) - 1 columnIndex[column] = (startIndex, endIndex) elif line.startswith("HEADER"): self.identifier = line.split()[-2] elif line.startswith("COMPND"): self.protein = line.split(":")[1].split(";")[0].strip() self.protein = " ".join(self.protein.split()) elif line.startswith("SOURCE"): self.organism = line.split(":")[1].split(";")[0].strip() self.organism = " ".join(self.organism.split()) def __repr__(self): res = []
for values in self.residues: res.append(str(values)) return "\n".join(res) def getSequenceStructure(self, chain): structs = {"H": "H", "G": "H", "I": "H", "E": "E", "B": "E", "T": "T", "C": "C", "S": "C", " ": "C"}
[docs] sequence = [] structure = [] for residue in self.residues: if residue[0][-1] == chain: sequence.append(residue[1][0]) structure.append(structs[residue[2][0]]) return "".join(sequence), "".join(structure)