- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

Python：计算两个蛋白或小分子之间的RMSD

DrugAI 发表于 2021/07/15 03:50:38 2021/07/15

【摘要】 Python脚本：计算两个蛋白或小分子之间的RMSD 用法： python rmsd.py protein1.pdb protein2.pdb   rmsd.py   # Root-mean-square deviation (RMSD) for proteins and/or ligands# in PDB files.# class Pdb...

Python脚本：计算两个蛋白或小分子之间的RMSD

用法：

python rmsd.py protein1.pdb protein2.pdb

rmsd.py


  
   
    
     
    
    
     
      # Root-mean-square deviation (RMSD) for proteins and/or ligands
     
    
   
    
     
    
    
     
      # in PDB files.
     
    
   
    
     
    
    
     
      #
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      class Pdb(object):
     
    
   
    
     
    
    
      """ Object that allows operations with protein files in PDB format. """
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      def __init__(self, file_cont = [], pdb_code = ""):
     
    
   
    
     
    
    
     
       self.cont = []
     
    
   
    
     
    
    
     
       self.atom = []
     
    
   
    
     
    
    
     
       self.hetatm = []
     
    
   
    
     
    
    
     
       self.fileloc = ""
     
    
   
    
     
    
    
      if isinstance(file_cont, list):
     
    
   
    
     
    
    
     
       self.cont = file_cont[:]
     
    
   
    
     
    
    
      elif isinstance(file_cont, str):
     
    
   
    
     
    
    
      try:
     
    
   
    
     
    
    
      with open(file_cont, 'r') as pdb_file:
     
    
   
    
     
    
    
     
       self.cont = [row.strip() for row in pdb_file.read().split('\n') if row.strip()]
     
    
   
    
     
    
    
      except FileNotFoundError as err:
     
    
   
    
     
    
    
     
       print(err)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      if self.cont:
     
    
   
    
     
    
    
     
       self.atom = [row for row in self.cont if row.startswith('ATOM')]
     
    
   
    
     
    
    
     
       self.hetatm = [row for row in self.cont if row.startswith('HETATM')]
     
    
   
    
     
    
    
     
       self.conect = [row for row in self.cont if row.startswith('CONECT')]
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      def rmsd(self, sec_molecule, ligand=False, atoms="no_h"):
     
    
   
    
     
    
    
      """
     
    
   
    
     
    
    
     
       Calculates the Root Mean Square Deviation (RMSD) between two
     
    
   
    
     
    
    
     
       protein or ligand molecules in PDB format.
     
    
   
    
     
    
    
     
       Requires that both molecules have the same number of atoms in the
     
    
   
    
     
    
    
     
       same numerical order.
     
    
   
    
     
    
    
     
       Keyword arguments:
     
    
   
    
     
    
    
     
       sec_molecule (PdbObj): the second molecule as PdbObj object.
     
    
   
    
     
    
    
     
       ligand (bool): If true, calculates the RMSD between two
     
    
   
    
     
    
    
     
       ligand molecules (based on HETATM entries), else RMSD
     
    
   
    
     
    
    
     
       between two protein molecules (ATOM entries) is calculated.
     
    
   
    
     
    
    
     
       hydrogen (bool): If True, hydrogen atoms will be included in the
     
    
   
    
     
    
    
     
       RMSD calculation.
     
    
   
    
     
    
    
     
       atoms (string) [all/c/no_h/ca]: "all" includes all atoms in the RMSD calculation,
     
    
   
    
     
    
    
     
       "c" only considers carbon atoms, "no_h" considers all but hydrogen atoms,
     
    
   
    
     
    
    
     
       and "ca" compares only C-alpha protein atoms.
     
    
   
    
     
    
    
     
       Returns:
     
    
   
    
     
    
    
     
       Calculated RMSD value as float or None if RMSD not be
     
    
   
    
     
    
    
     
       calculated.
     
    
   
    
     
    
    
     
       """
     
    
   
    
     
    
    
     
       rmsd = None
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      if not ligand:
     
    
   
    
     
    
    
     
       coords1, coords2 = self.atom, sec_molecule.atom
     
    
   
    
     
    
    
      else:
     
    
   
    
     
    
    
     
       coords1, coords2 = self.hetatm, sec_molecule.hetatm
     
    
   
    
     
    
    
      if atoms == "c":
     
    
   
    
     
    
    
     
       coords1 = [row for row in coords1 if row[77:].startswith('C')]
     
    
   
    
     
    
    
     
       coords2 = [row for row in coords2 if row[77:].startswith('C')]
     
    
   
    
     
    
    
      elif atoms == "no_h":
     
    
   
    
     
    
    
     
       coords1 = [row for row in coords1 if not row[77:].startswith('H')]
     
    
   
    
     
    
    
     
       coords2 = [row for row in coords2 if not row[77:].startswith('H')]
     
    
   
    
     
    
    
      elif atoms == "ca":
     
    
   
    
     
    
    
     
       coords1 = self.calpha()
     
    
   
    
     
    
    
     
       coords2 = sec_molecule.calpha()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      if all((coords1, coords2, len(coords1) == len(coords2))):
     
    
   
    
     
    
    
     
       total = 0
     
    
   
    
     
    
    
      for (i, j) in zip(coords1, coords2):
     
    
   
    
     
    
    
     
       total += ( float(i[30:38]) - float(j[30:38]) )**2 +\
     
    
   
    
     
    
    
     
       ( float(i[38:46]) - float(j[38:46]) )**2 +\
     
    
   
    
     
    
    
     
       ( float(i[46:54]) - float(j[46:54]) )**2 
     
    
   
    
     
    
    
     
       rmsd = round(( total / len(coords1) )**0.5, 4)
     
    
   
    
     
    
    
      return rmsd
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      if __name__ == '__main__':
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      import argparse
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
       parser = argparse.ArgumentParser(
     
    
   
    
     
    
    
     
       description='The RMSD measures the average distance between atoms \n'\
     
    
   
    
     
    
    
      'of 2 protein or ligand structures.\n'\
     
    
   
    
     
    
    
      'By default, all atoms but hydrogen atoms of the protein are included in the RMSD calculation.\n'\
     
    
   
    
     
    
    
      'NOTE: Both structures must contain the same number of atoms in similar order.',
     
    
   
    
     
    
    
     
       epilog='Example:\n'\
     
    
   
    
     
    
    
      'rmsd.py ~/Desktop/pdb1.pdb ~/Desktop/pdb2.pdb\n'\
     
    
   
    
     
    
    
      '0.7377',
     
    
   
    
     
    
    
     
       formatter_class=argparse.RawTextHelpFormatter
     
    
   
    
     
    
    
     
       )
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
       parser.add_argument('PDBfile1')
     
    
   
    
     
    
    
     
       parser.add_argument('PDBfile2')
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
       parser.add_argument('-l', '--ligand', action='store_true', help='Calculates RMSD between ligand (HETATM) atoms.')
     
    
   
    
     
    
    
     
       parser.add_argument('-c', '--carbon', action='store_true', help='Calculates the RMSD between carbon atoms only.')
     
    
   
    
     
    
    
     
       parser.add_argument('-ca', '--calpha', action='store_true', help='Calculates the RMSD between alpha-carbon atoms only.')
     
    
   
    
     
    
    
     
       parser.add_argument('-v', '--version', action='version', version='rmsd v. 1.0')
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
       args = parser.parse_args()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
       pdb1 = Pdb(args.PDBfile1)
     
    
   
    
     
    
    
     
       pdb2 = Pdb(args.PDBfile2)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      if args.carbon and args.calpha:
     
    
   
    
     
    
    
     
       print('\nERROR: Please provide EITHER -c OR -ca, not both.\n')
     
    
   
    
     
    
    
     
       parser.print_help()
     
    
   
    
     
    
    
     
       quit()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      if args.ligand and args.carbon:
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=True, atoms="c"))
     
    
   
    
     
    
    
      elif args.ligand and args.calpha:
     
    
   
    
     
    
    
     
       print(pdb1.calpha())
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=True, atoms="ca"))
     
    
   
    
     
    
    
      elif args.ligand:
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=True, atoms="no_h"))
     
    
   
    
     
    
    
      elif args.calpha:
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=False, atoms="ca"))
     
    
   
    
     
    
    
      elif args.carbon:
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=False, atoms="c"))
     
    
   
    
     
    
    
      else:
     
    
   
    
     
    
    
     
       print(pdb1.rmsd(sec_molecule=pdb2, ligand=False, atoms="no_h"))

文章来源: drugai.blog.csdn.net，作者：DrugAI，版权归原作者所有，如需转载，请联系作者。

原文链接：drugai.blog.csdn.net/article/details/81008659

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入

Python：计算两个蛋白或小分子之间的RMSD

Python脚本：计算两个蛋白或小分子之间的RMSD

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

Python：计算两个蛋白或小分子之间的RMSD

Python脚本：计算两个蛋白或小分子之间的RMSD

全部回复

设置昵称

关于作者

目录

热门推荐查看更多

相关文章

加入云驻计划，成为创作者

相关产品