def IdentifyIncompleteResidues(allAtomDetails): mol = Molecule() allAtoms = [] for details in allAtomDetails: iAtom = Atom() iAtom.set_attributes(details) allAtoms.append(iAtom) mol.SetAtoms(allAtoms) mol.IdentifyResidues() mol.IdentifyMissingAtomsInResidues() return mol def GetAtomsFromAtomDetails(allAtomDetails): allAtoms = [] for detail in allAtomDetails: tempAtom = GetAtom(detail) allAtoms.append(tempAtom) return allAtoms def GetAtom(detail): # STRUCTURE OF DETAIL IS DEFINED AS, [header,atomnum,atomname,altloc,resname,chain,resnum,icode,x,y,z,occupancy,bfactor,element,charge] newAtom = Atom() newAtom.set_attributes(detail) return newAtom def ReadPDB(fpath): allAtomDetails = [] ifs = open(fpath,'r') lines = ifs.readlines() for line in lines: #print 'line length',len(line.strip()) if len(line.strip()) == 0: # this is an empty line at the end of the PDB file, so ignore continue if line[:6] != 'ATOM ' and line[:6] != 'HETATM': #print 'WARNING!!! This function will only take ATOM or HETATM input. Ignoring following line:',line continue header = line[0:6] atomnum = line[6:11] atomname = line[12:16] altloc = line[16] resname = line[17:20] chain = line[22] resnum = line[22:26] icode = line[27] x = line[30:38] y = line[38:46] z = line[46:54] occupancy = line[54:60] bfactor = line[60:66] element = line[76:78] charge = line[78:80] allAtomDetails.append([header,atomnum,atomname,altloc,resname,chain,resnum,icode,x,y,z,occupancy,bfactor,element,charge]) #print [header,atomnum,atomname,altloc,resname,chain,resnum,icode,x,y,z,occupancy,bfactor,element,charge] print 'total number of atom details obtained is:',len(allAtomDetails) return allAtomDetails def GetProteinMolecule(allAtoms): mol = Molecule() mol.SetAtoms(allAtoms) mol.IdentifyResidues() mol.ExtractSequenceForMolecule() return mol class Atom: def __init__(self): self.header = '' self.atomnum = '' self.atomname = '' self.altloc = '' self.resname = '' self.chain = '' self.resnum = '' self.icode = '' self.x = '' self.y = '' self.z = '' self.occupancy = '' self.bfactor = '' self.element = '' self.charge = '' self.backbone = '' def set_attributes(self,details): header, atomnum,atomname,altloc,resname,chain,resnum,icode,x,y,z,occupancy,bfactor,element,charge = details self.header = header self.atomnum = atomnum self.atomname = atomname self.altloc = altloc self.resname = resname self.chain = chain self.resnum = resnum self.icode = icode self.x = x self.y = y self.z = z self.occupancy = occupancy self.bfactor = bfactor self.element = element self.charge = charge #def get_attributes(self): #return ''.join([self.header,self.atomnum,self.atomname,self.altloc,self.resname,self.chain,self.resnum,self.icode,self.x,self.y,self.z,self.occupancy,self.bfactor,self.element,self.charge]) def get_attributes(self): return [self.header,self.atomnum,self.atomname,self.altloc,self.resname,self.chain,self.resnum,self.icode,self.x,self.y,self.z,self.occupancy,self.bfactor,self.element,self.charge] RegularResidues = ['ALA','ARG','ASN','ASP','CYS','CYX','GLN','GLU','GLY','HIS','HSD','HSE','HSP','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL'] # Note: MSE has been added as a regular residue, which is not necessarily a good thing. Reconsider later. WarningMessage = 'WARNING!!!!\nAt this time, this function does not support non-standard amino acid residues\nor anything that is not an amino acid residue. Please check input.' def one_letter_code(resname): if len(resname) != 3 or resname not in RegularResidues: print WarningMessage res_acronyms = {'ALA':'A','ARG':'R','ASN':'N','ASP':'D','CYS':'C','CYX':'C','GLN':'Q','GLU':'E','GLY':'G','HIS':'H','HSD':'H','HSE':'H','HSP':'H','ILE':'I','LEU':'L','LYS':'K','MET':'M','PHE':'F','PRO':'P','SER':'S','THR':'T','TRP':'W','TYR':'Y','VAL':'V'} # We are taking MSE to be simply MET here, which again may not be correct. Reconsider later. return res_acronyms[resname] def three_letter_code(resname): if len(resname) != 3 or resname not in RegularResidues: print WarningMessage res_acronyms = {'ALA':'ALA','ARG':'ARG','ASN':'ASN','ASP':'ASP','CYS':'CYS','CYX':'CYX','GLN':'GLN','GLU':'GLU','GLY':'GLY','HIS':'HIS','HSD':'HSD','HSE':'HSE','HSP':'HSP','ILE':'ILE','LEU':'LEU','LYS':'LYS','MET':'MET','PHE':'PHE','PRO':'PRO','SER':'SER','THR':'THR','TRP':'TRP','TYR':'TYR','VAL':'VAL'} return res_acronyms[resname] class Residue: def __init__(self): self.resnum = '' self.resname = '' self.atoms = [] self.all_heavy_atoms_are_present = True def set_attributes(self,resnum,resname): self.resnum = resnum self.resname = resname def append_atom(self,atom): self.atoms.append(atom) def return_one_letter_code(self): return one_letter_code(self.resname) def return_three_letter_code(self): return three_letter_code(self.resname) def get_all_atomnames(self): atomList = [] for atom in self.atoms: atomList.append(atom.atomname.strip()) return atomList def mutate_to_ALA(self): self.determine_backbone_atoms() for atom in self.atoms: if not atom.backbone and atom.atomname.strip() == 'CB': self.atoms.pop(self.atoms.index(atom)) else: atom.resname.replace(atom.atomname.strip(),'ALA') self.resname.replace(self.resname.strip(),'ALA') print 'mutated:',self.resname,self.resnum,'to ALA' print 'the new residue now has',len(self.atoms),'atoms:' def determine_backbone_atoms(self): for atom in self.atoms: if atom.atomname in ['N','CA','C','O']: atom.backbone = True else: atom.backbone = False class ResidueHeavyAtoms: def __init__(self): self.HeavyAtomDictionary = {\ 'ALA':['N','CA','C','O','CB'],\ 'ARG':['N','CA','C','O','CB','CG','CD','NE','CZ','NH1','NH2'],\ 'ASP':['N','CA','C','O','CB','CG','OD1','OD2'],\ 'ASN':['N','CA','C','O','CB','CG','OD1','ND2'],\ 'CYS':['N','CA','C','O','CB','SG'],\ 'CYX':['N','CA','C','O','CB','SG'],\ 'GLU':['N','CA','C','O','CB','CG','CD','OE1','OE2'],\ 'GLN':['N','CA','C','O','CB','CG','CD','OE1','NE2'],\ 'GLY':['N','CA','C','O'],\ 'HIS':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'],\ 'HSP':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'],\ 'HSD':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'],\ 'HSE':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'],\ 'ILE':['N','CA','C','O','CB','CG1','CG2','CD1'],\ 'LEU':['N','CA','C','O','CB','CG','CD1','CD2'],\ 'LYS':['N','CA','C','O','CB','CG','CD','CE','NZ'],\ 'MET':['N','CA','C','O','CB','CG','SD','CE'],\ 'PHE':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ'],\ 'PRO':['N','CA','C','O','CB','CG','CD'],\ 'SER':['N','CA','C','O','CB','OG'],\ 'THR':['N','CA','C','O','CB','CG2','OG1'],\ 'TRP':['N','CA','C','O','CB','CG','CD1','CD2','CE2','CE3','CZ2','CZ3','CH2','NE1'],\ 'TYR':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ','OH'],\ 'VAL':['N','CA','C','O','CB','CG1','CG2'],\ 'MSE':['N','CA','C','O','CB','CG','SE','CE'],\ } def CheckIfRegularResidue( resname ): if resname not in RegularResidues: print 'Found non-standard residue:',resname return False else: return True def EnsureAllHeavyAtomsArePresent(res): heavyAtomList = res.get_all_atomnames() HeavyAtomDictionary = ResidueHeavyAtoms().HeavyAtomDictionary for heavyAtom in HeavyAtomDictionary[ res.resname.strip() ]: #print heavyAtom, res.resname.strip(), HeavyAtomDictionary[ res.resname.strip() ], heavyAtom not in heavyAtomList if heavyAtom not in heavyAtomList: res.all_heavy_atoms_are_present = False return res class Molecule: def __init__(self): self.atoms = '' self.sequence = '' self.residues = '' def write_pdb(self,ofs_name): ofs = open(ofs_name,'w') for atom in self.atoms: print atom.header+atom.atomnum+atom.atomname+atom.altloc+atom.resname+atom.chain+atom.resnum+atom.icode+atom.x+atom.y+atom.z+atom.occupancy+atom.bfactor+atom.element+atom.charge print atom.get_attributes() record = atom.get_attributes() ofs.write(record + '\n') ofs.close() print 'wrote pdb file:',ofs_name def SetAtoms(self,atoms): self.atoms = atoms def IdentifyResidues(self): allResidues = [] allResidueNumbers = [] for atom in self.atoms: #print [atom.header,atom.atomnum,atom.atomname,atom.altloc,atom.resname,atom.chain,atom.resnum,atom.icode,atom.x,atom.y,atom.z,atom.occupancy,atom.bfactor,atom.element,atom.charge] if atom.resnum not in allResidueNumbers: allResidueNumbers.append(atom.resnum) newResidue = Residue() newResidue.set_attributes(atom.resnum,atom.resname) newResidue.append_atom(atom) allResidues.append(newResidue) else: thisRes = '' for res in allResidues: if res.resnum == atom.resnum and res.resname == res.resname: thisRes = res break thisRes.append_atom(atom) self.residues = allResidues print 'total number of residues obtained is:',len(self.residues) def ExtractSequenceForMolecule(self): sequence = [] for residue in self.residues: sequence.append(residue.return_one_letter_code()) self.sequence = sequence def GetSequence(self): return self.sequence def IdentifyMissingAtomsInResidues(self): new_residues = [] for res in self.residues: newres = EnsureAllHeavyAtomsArePresent(res) #print newres.all_heavy_atoms_are_present new_residues.append(newres) self.residues = new_residues def MutateNonStandardResiduesToALA(self): for residue in self.residues: if not CheckIfRegularResidue(residue.resname): residue.mutate_to_ALA()