|
|
|
@@ -0,0 +1,686 @@ |
|
|
|
# ============================================================================= |
|
|
|
# Copyright 2022 HeliXon Limited |
|
|
|
# This file is adopted from DeepMind Technologies Limited. |
|
|
|
# |
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
# you may not use this file except in compliance with the License. |
|
|
|
# You may obtain a copy of the License at |
|
|
|
# |
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
|
|
# |
|
|
|
# Unless required by applicable law or agreed to in writing, software |
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
# See the License for the specific language governing permissions and |
|
|
|
# limitations under the License. |
|
|
|
# ============================================================================= |
|
|
|
"""Constants used in OmegaFold.""" |
|
|
|
import Bio.PDB |
|
|
|
import torch |
|
|
|
# Internal import (35fd). |
|
|
|
# Distance from one CA to next CA [trans configuration: omega = 180]. |
|
|
|
from Bio.Data import PDBData |
|
|
|
|
|
|
|
ca_ca = 3.80209737096 |
|
|
|
|
|
|
|
# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in |
|
|
|
# this order (or a relevant subset from chi1 onwards). ALA and GLY don"t have |
|
|
|
# chi angles so their chi angle lists are empty. |
|
|
|
chi_angles_atoms = { |
|
|
|
'ALA': [], |
|
|
|
# Chi5 in arginine is always 0 +- 5 degrees, so ignore it. |
|
|
|
'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], |
|
|
|
['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']], |
|
|
|
'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], |
|
|
|
'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], |
|
|
|
'CYS': [['N', 'CA', 'CB', 'SG']], |
|
|
|
'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], |
|
|
|
['CB', 'CG', 'CD', 'OE1']], |
|
|
|
'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], |
|
|
|
['CB', 'CG', 'CD', 'OE1']], |
|
|
|
'GLY': [], |
|
|
|
'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']], |
|
|
|
'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']], |
|
|
|
'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], |
|
|
|
'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], |
|
|
|
['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']], |
|
|
|
'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'], |
|
|
|
['CB', 'CG', 'SD', 'CE']], |
|
|
|
'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], |
|
|
|
'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']], |
|
|
|
'SER': [['N', 'CA', 'CB', 'OG']], |
|
|
|
'THR': [['N', 'CA', 'CB', 'OG1']], |
|
|
|
'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], |
|
|
|
'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], |
|
|
|
'VAL': [['N', 'CA', 'CB', 'CG1']], |
|
|
|
} |
|
|
|
|
|
|
|
# If chi angles given in fixed-length array, this matrix determines how to |
|
|
|
# torsion_angles_mask them for each AA type. The order is as per |
|
|
|
# restype_order (see below). |
|
|
|
chi_angles_mask = torch.tensor([ |
|
|
|
[0.0, 0.0, 0.0, 0.0], # ALA |
|
|
|
[1.0, 1.0, 1.0, 1.0], # ARG |
|
|
|
[1.0, 1.0, 0.0, 0.0], # ASN |
|
|
|
[1.0, 1.0, 0.0, 0.0], # ASP |
|
|
|
[1.0, 0.0, 0.0, 0.0], # CYS |
|
|
|
[1.0, 1.0, 1.0, 0.0], # GLN |
|
|
|
[1.0, 1.0, 1.0, 0.0], # GLU |
|
|
|
[0.0, 0.0, 0.0, 0.0], # GLY |
|
|
|
[1.0, 1.0, 0.0, 0.0], # HIS |
|
|
|
[1.0, 1.0, 0.0, 0.0], # ILE |
|
|
|
[1.0, 1.0, 0.0, 0.0], # LEU |
|
|
|
[1.0, 1.0, 1.0, 1.0], # LYS |
|
|
|
[1.0, 1.0, 1.0, 0.0], # MET |
|
|
|
[1.0, 1.0, 0.0, 0.0], # PHE |
|
|
|
[1.0, 1.0, 0.0, 0.0], # PRO |
|
|
|
[1.0, 0.0, 0.0, 0.0], # SET |
|
|
|
[1.0, 0.0, 0.0, 0.0], # THR |
|
|
|
[1.0, 1.0, 0.0, 0.0], # TRP |
|
|
|
[1.0, 1.0, 0.0, 0.0], # TYR |
|
|
|
[1.0, 0.0, 0.0, 0.0], # VAL |
|
|
|
[0.0, 0.0, 0.0, 0.0], # UNK |
|
|
|
]) |
|
|
|
|
|
|
|
# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, |
|
|
|
# phi, psi and chi angles: |
|
|
|
# 0: "backbone group", |
|
|
|
# 1: "pre-omega-group", (empty) |
|
|
|
# 2: "phi-group", (currently empty, because it defines only hydrogens) |
|
|
|
# 3: "psi-group", |
|
|
|
# 4,5,6,7: "chi1,2,3,4-group" |
|
|
|
# The atom positions are relative to the axis-end-atom of the corresponding |
|
|
|
# rotation axis. The x-axis is in direction of the rotation axis, and the |
|
|
|
# y-axis is defined such that the dihedral-angle-definiting atom (the last |
|
|
|
# entry in chi_angles_atoms above) is in the xy-plane (with a positive |
|
|
|
# y-coordinate). format: [atomname, group_idx, rel_position] |
|
|
|
aa_atom_positions = { |
|
|
|
'ALA': [ |
|
|
|
['N', 0, (-0.525, 1.363, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.529, -0.774, -1.205)], |
|
|
|
['O', 3, (0.627, 1.062, 0.000)], |
|
|
|
], |
|
|
|
'ARG': [ |
|
|
|
['N', 0, (-0.524, 1.362, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.524, -0.778, -1.209)], |
|
|
|
['O', 3, (0.626, 1.062, 0.000)], |
|
|
|
['CG', 4, (0.616, 1.390, -0.000)], |
|
|
|
['CD', 5, (0.564, 1.414, 0.000)], |
|
|
|
['NE', 6, (0.539, 1.357, -0.000)], |
|
|
|
['NH1', 7, (0.206, 2.301, 0.000)], |
|
|
|
['NH2', 7, (2.078, 0.978, -0.000)], |
|
|
|
['CZ', 7, (0.758, 1.093, -0.000)], |
|
|
|
], |
|
|
|
'ASN': [ |
|
|
|
['N', 0, (-0.536, 1.357, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.531, -0.787, -1.200)], |
|
|
|
['O', 3, (0.625, 1.062, 0.000)], |
|
|
|
['CG', 4, (0.584, 1.399, 0.000)], |
|
|
|
['ND2', 5, (0.593, -1.188, 0.001)], |
|
|
|
['OD1', 5, (0.633, 1.059, 0.000)], |
|
|
|
], |
|
|
|
'ASP': [ |
|
|
|
['N', 0, (-0.525, 1.362, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.527, 0.000, -0.000)], |
|
|
|
['CB', 0, (-0.526, -0.778, -1.208)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.593, 1.398, -0.000)], |
|
|
|
['OD1', 5, (0.610, 1.091, 0.000)], |
|
|
|
['OD2', 5, (0.592, -1.101, -0.003)], |
|
|
|
], |
|
|
|
'CYS': [ |
|
|
|
['N', 0, (-0.522, 1.362, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.524, 0.000, 0.000)], |
|
|
|
['CB', 0, (-0.519, -0.773, -1.212)], |
|
|
|
['O', 3, (0.625, 1.062, -0.000)], |
|
|
|
['SG', 4, (0.728, 1.653, 0.000)], |
|
|
|
], |
|
|
|
'GLN': [ |
|
|
|
['N', 0, (-0.526, 1.361, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, 0.000, 0.000)], |
|
|
|
['CB', 0, (-0.525, -0.779, -1.207)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.615, 1.393, 0.000)], |
|
|
|
['CD', 5, (0.587, 1.399, -0.000)], |
|
|
|
['NE2', 6, (0.593, -1.189, -0.001)], |
|
|
|
['OE1', 6, (0.634, 1.060, 0.000)], |
|
|
|
], |
|
|
|
'GLU': [ |
|
|
|
['N', 0, (-0.528, 1.361, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.526, -0.781, -1.207)], |
|
|
|
['O', 3, (0.626, 1.062, 0.000)], |
|
|
|
['CG', 4, (0.615, 1.392, 0.000)], |
|
|
|
['CD', 5, (0.600, 1.397, 0.000)], |
|
|
|
['OE1', 6, (0.607, 1.095, -0.000)], |
|
|
|
['OE2', 6, (0.589, -1.104, -0.001)], |
|
|
|
], |
|
|
|
'GLY': [ |
|
|
|
['N', 0, (-0.572, 1.337, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.517, -0.000, -0.000)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
], |
|
|
|
'HIS': [ |
|
|
|
['N', 0, (-0.527, 1.360, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, 0.000, 0.000)], |
|
|
|
['CB', 0, (-0.525, -0.778, -1.208)], |
|
|
|
['O', 3, (0.625, 1.063, 0.000)], |
|
|
|
['CG', 4, (0.600, 1.370, -0.000)], |
|
|
|
['CD2', 5, (0.889, -1.021, 0.003)], |
|
|
|
['ND1', 5, (0.744, 1.160, -0.000)], |
|
|
|
['CE1', 5, (2.030, 0.851, 0.002)], |
|
|
|
['NE2', 5, (2.145, -0.466, 0.004)], |
|
|
|
], |
|
|
|
'ILE': [ |
|
|
|
['N', 0, (-0.493, 1.373, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.527, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.536, -0.793, -1.213)], |
|
|
|
['O', 3, (0.627, 1.062, -0.000)], |
|
|
|
['CG1', 4, (0.534, 1.437, -0.000)], |
|
|
|
['CG2', 4, (0.540, -0.785, -1.199)], |
|
|
|
['CD1', 5, (0.619, 1.391, 0.000)], |
|
|
|
], |
|
|
|
'LEU': [ |
|
|
|
['N', 0, (-0.520, 1.363, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.522, -0.773, -1.214)], |
|
|
|
['O', 3, (0.625, 1.063, -0.000)], |
|
|
|
['CG', 4, (0.678, 1.371, 0.000)], |
|
|
|
['CD1', 5, (0.530, 1.430, -0.000)], |
|
|
|
['CD2', 5, (0.535, -0.774, 1.200)], |
|
|
|
], |
|
|
|
'LYS': [ |
|
|
|
['N', 0, (-0.526, 1.362, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, 0.000, 0.000)], |
|
|
|
['CB', 0, (-0.524, -0.778, -1.208)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.619, 1.390, 0.000)], |
|
|
|
['CD', 5, (0.559, 1.417, 0.000)], |
|
|
|
['CE', 6, (0.560, 1.416, 0.000)], |
|
|
|
['NZ', 7, (0.554, 1.387, 0.000)], |
|
|
|
], |
|
|
|
'MET': [ |
|
|
|
['N', 0, (-0.521, 1.364, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, 0.000, 0.000)], |
|
|
|
['CB', 0, (-0.523, -0.776, -1.210)], |
|
|
|
['O', 3, (0.625, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.613, 1.391, -0.000)], |
|
|
|
['SD', 5, (0.703, 1.695, 0.000)], |
|
|
|
['CE', 6, (0.320, 1.786, -0.000)], |
|
|
|
], |
|
|
|
'PHE': [ |
|
|
|
['N', 0, (-0.518, 1.363, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.524, 0.000, -0.000)], |
|
|
|
['CB', 0, (-0.525, -0.776, -1.212)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.607, 1.377, 0.000)], |
|
|
|
['CD1', 5, (0.709, 1.195, -0.000)], |
|
|
|
['CD2', 5, (0.706, -1.196, 0.000)], |
|
|
|
['CE1', 5, (2.102, 1.198, -0.000)], |
|
|
|
['CE2', 5, (2.098, -1.201, -0.000)], |
|
|
|
['CZ', 5, (2.794, -0.003, -0.001)], |
|
|
|
], |
|
|
|
'PRO': [ |
|
|
|
['N', 0, (-0.566, 1.351, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.527, -0.000, 0.000)], |
|
|
|
['CB', 0, (-0.546, -0.611, -1.293)], |
|
|
|
['O', 3, (0.621, 1.066, 0.000)], |
|
|
|
['CG', 4, (0.382, 1.445, 0.0)], |
|
|
|
# ["CD", 5, (0.427, 1.440, 0.0)], |
|
|
|
['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger |
|
|
|
], |
|
|
|
'SER': [ |
|
|
|
['N', 0, (-0.529, 1.360, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.518, -0.777, -1.211)], |
|
|
|
['O', 3, (0.626, 1.062, -0.000)], |
|
|
|
['OG', 4, (0.503, 1.325, 0.000)], |
|
|
|
], |
|
|
|
'THR': [ |
|
|
|
['N', 0, (-0.517, 1.364, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.526, 0.000, -0.000)], |
|
|
|
['CB', 0, (-0.516, -0.793, -1.215)], |
|
|
|
['O', 3, (0.626, 1.062, 0.000)], |
|
|
|
['CG2', 4, (0.550, -0.718, -1.228)], |
|
|
|
['OG1', 4, (0.472, 1.353, 0.000)], |
|
|
|
], |
|
|
|
'TRP': [ |
|
|
|
['N', 0, (-0.521, 1.363, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.525, -0.000, 0.000)], |
|
|
|
['CB', 0, (-0.523, -0.776, -1.212)], |
|
|
|
['O', 3, (0.627, 1.062, 0.000)], |
|
|
|
['CG', 4, (0.609, 1.370, -0.000)], |
|
|
|
['CD1', 5, (0.824, 1.091, 0.000)], |
|
|
|
['CD2', 5, (0.854, -1.148, -0.005)], |
|
|
|
['CE2', 5, (2.186, -0.678, -0.007)], |
|
|
|
['CE3', 5, (0.622, -2.530, -0.007)], |
|
|
|
['NE1', 5, (2.140, 0.690, -0.004)], |
|
|
|
['CH2', 5, (3.028, -2.890, -0.013)], |
|
|
|
['CZ2', 5, (3.283, -1.543, -0.011)], |
|
|
|
['CZ3', 5, (1.715, -3.389, -0.011)], |
|
|
|
], |
|
|
|
'TYR': [ |
|
|
|
['N', 0, (-0.522, 1.362, 0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.524, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.522, -0.776, -1.213)], |
|
|
|
['O', 3, (0.627, 1.062, -0.000)], |
|
|
|
['CG', 4, (0.607, 1.382, -0.000)], |
|
|
|
['CD1', 5, (0.716, 1.195, -0.000)], |
|
|
|
['CD2', 5, (0.713, -1.194, -0.001)], |
|
|
|
['CE1', 5, (2.107, 1.200, -0.002)], |
|
|
|
['CE2', 5, (2.104, -1.201, -0.003)], |
|
|
|
['OH', 5, (4.168, -0.002, -0.005)], |
|
|
|
['CZ', 5, (2.791, -0.001, -0.003)], |
|
|
|
], |
|
|
|
'VAL': [ |
|
|
|
['N', 0, (-0.494, 1.373, -0.000)], |
|
|
|
['CA', 0, (0.000, 0.000, 0.000)], |
|
|
|
['C', 0, (1.527, -0.000, -0.000)], |
|
|
|
['CB', 0, (-0.533, -0.795, -1.213)], |
|
|
|
['O', 3, (0.627, 1.062, -0.000)], |
|
|
|
['CG1', 4, (0.540, 1.429, -0.000)], |
|
|
|
['CG2', 4, (0.533, -0.776, 1.203)], |
|
|
|
], |
|
|
|
} |
|
|
|
|
|
|
|
for aa_k, aa_dict, in aa_atom_positions.items(): |
|
|
|
for i, v in enumerate(aa_dict): |
|
|
|
aa_dict[i][-1] = torch.tensor(v[-1]) |
|
|
|
aa_atom_positions[aa_k] = aa_dict |
|
|
|
|
|
|
|
# This mapping is used when we need to store atom data in a format that |
|
|
|
# requires fixed atom data size for every residue (e.g. a numpy array). |
|
|
|
atom_types = [ |
|
|
|
'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD', |
|
|
|
'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3', |
|
|
|
'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2', |
|
|
|
'CZ3', 'NZ', 'OXT' |
|
|
|
] |
|
|
|
atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)} |
|
|
|
atom_type_num = len(atom_types) # := 37. |
|
|
|
|
|
|
|
# A compact atom encoding with 14 columns |
|
|
|
# pylint: disable=line-too-long |
|
|
|
# pylint: disable=bad-whitespace |
|
|
|
restype_name_to_atom14_names = { |
|
|
|
'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''], |
|
|
|
'ARG': [ |
|
|
|
'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', |
|
|
|
'', '' |
|
|
|
], |
|
|
|
'ASN': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''], |
|
|
|
'ASP': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''], |
|
|
|
'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''], |
|
|
|
'GLN': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''], |
|
|
|
'GLU': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''], |
|
|
|
'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''], |
|
|
|
'HIS': [ |
|
|
|
'N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', |
|
|
|
'', '' |
|
|
|
], |
|
|
|
'ILE': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''], |
|
|
|
'LEU': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''], |
|
|
|
'LYS': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''], |
|
|
|
'MET': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''], |
|
|
|
'PHE': [ |
|
|
|
'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', |
|
|
|
'', '' |
|
|
|
], |
|
|
|
'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''], |
|
|
|
'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''], |
|
|
|
'THR': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''], |
|
|
|
'TRP': [ |
|
|
|
'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', |
|
|
|
'CZ2', 'CZ3', 'CH2' |
|
|
|
], |
|
|
|
'TYR': [ |
|
|
|
'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', |
|
|
|
'OH', '', '' |
|
|
|
], |
|
|
|
'VAL': |
|
|
|
['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''], |
|
|
|
'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''], |
|
|
|
} |
|
|
|
# pylint: enable=line-too-long |
|
|
|
# pylint: enable=bad-whitespace |
|
|
|
|
|
|
|
# This is the standard residue order when coding AA type as a number. |
|
|
|
# Reproduce it by taking 3-letter AA codes and sorting them alphabetically. |
|
|
|
restypes = [ |
|
|
|
'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', |
|
|
|
'S', 'T', 'W', 'Y', 'V' |
|
|
|
] |
|
|
|
restype_order = {restype: i for i, restype in enumerate(restypes)} |
|
|
|
restype_num = len(restypes) # := 20. |
|
|
|
unk_restype_index = restype_num # Catch-all index for unknown restypes. |
|
|
|
|
|
|
|
restypes_with_x = restypes + ['X', '-'] |
|
|
|
restype_order_with_x = { |
|
|
|
restype: i |
|
|
|
for i, restype in enumerate(restypes_with_x) |
|
|
|
} |
|
|
|
restype_1to3 = { |
|
|
|
'A': 'ALA', |
|
|
|
'R': 'ARG', |
|
|
|
'N': 'ASN', |
|
|
|
'D': 'ASP', |
|
|
|
'C': 'CYS', |
|
|
|
'Q': 'GLN', |
|
|
|
'E': 'GLU', |
|
|
|
'G': 'GLY', |
|
|
|
'H': 'HIS', |
|
|
|
'I': 'ILE', |
|
|
|
'L': 'LEU', |
|
|
|
'K': 'LYS', |
|
|
|
'M': 'MET', |
|
|
|
'F': 'PHE', |
|
|
|
'P': 'PRO', |
|
|
|
'S': 'SER', |
|
|
|
'T': 'THR', |
|
|
|
'W': 'TRP', |
|
|
|
'Y': 'TYR', |
|
|
|
'V': 'VAL', |
|
|
|
'X': 'UNK' |
|
|
|
} |
|
|
|
|
|
|
|
# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple |
|
|
|
# 1-to-1 mapping of 3 letter names to one letter names. The latter contains |
|
|
|
# many more, and less common, three letter names as keys and maps many of these |
|
|
|
# to the same one letter name (including "X" and "U" which we don"t use here). |
|
|
|
restype_3to1 = {v: k for k, v in restype_1to3.items()} |
|
|
|
|
|
|
|
restype2atom_mask = torch.zeros([len(restypes_with_x), 14]) |
|
|
|
for k, v in restype_name_to_atom14_names.items(): |
|
|
|
for i, atom in enumerate(v): |
|
|
|
restype2atom_mask[restype_order_with_x[ |
|
|
|
restype_3to1[k]]][i] = len(atom) > 0 |
|
|
|
|
|
|
|
restype_rigidgroup_mask = torch.zeros([21, 8], dtype=torch.float) |
|
|
|
restype_rigidgroup_mask[:, 0] = 1 |
|
|
|
restype_rigidgroup_mask[:, 3] = 1 |
|
|
|
restype_rigidgroup_mask[:, 4:] = chi_angles_mask |
|
|
|
|
|
|
|
|
|
|
|
# Compute a mask whether the group exists. |
|
|
|
# (N, 8) |
|
|
|
def residx_to_3(idx): |
|
|
|
return restype_1to3[restypes[idx]] |
|
|
|
|
|
|
|
|
|
|
|
# Define a restype name for all unknown residues. |
|
|
|
unk_restype = 'UNK' |
|
|
|
|
|
|
|
resnames = [restype_1to3[r] for r in restypes] + [unk_restype] |
|
|
|
resname_to_idx = {resname: i for i, resname in enumerate(resnames)} |
|
|
|
|
|
|
|
|
|
|
|
def get_chi_angle_atom_indices(): |
|
|
|
"""Returns atom indices needed to compute chi angles for all residue types. |
|
|
|
|
|
|
|
Returns: |
|
|
|
A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue |
|
|
|
types are in the order specified in residue_constants.restypes + |
|
|
|
unknown residue type at the end. For chi angles which are not defined |
|
|
|
on the residue, the positions indices are by default set to 0. |
|
|
|
""" |
|
|
|
chi_atom_indices = [] |
|
|
|
for residue_name in restypes: |
|
|
|
residue_name = restype_1to3[residue_name] |
|
|
|
residue_chi_angles = chi_angles_atoms[residue_name] |
|
|
|
atom_indices = [] |
|
|
|
for chi_angle in residue_chi_angles: |
|
|
|
atom_indices.append([atom_order[_atom] for _atom in chi_angle]) |
|
|
|
for _ in range(4 - len(atom_indices)): |
|
|
|
atom_indices.append([0, 0, 0, 0]) # For those not defined on AA. |
|
|
|
chi_atom_indices.append(atom_indices) |
|
|
|
|
|
|
|
chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue. |
|
|
|
|
|
|
|
return torch.tensor(chi_atom_indices) |
|
|
|
|
|
|
|
|
|
|
|
chi_angle_atom_indices = get_chi_angle_atom_indices() |
|
|
|
|
|
|
|
|
|
|
|
def _make_rigid_transformation_4x4(ex: torch.Tensor, ey: torch.Tensor, |
|
|
|
translation: torch.Tensor) -> torch.Tensor: |
|
|
|
"""Create a rigid 4x4 transformation matrix from two axes and transl.""" |
|
|
|
# Normalize ex. |
|
|
|
ex_normalized = ex / torch.linalg.norm(ex) |
|
|
|
|
|
|
|
# make ey perpendicular to ex |
|
|
|
ey_normalized = ey - torch.dot(ey, ex_normalized) * ex_normalized |
|
|
|
ey_normalized /= torch.linalg.norm(ey_normalized) |
|
|
|
|
|
|
|
# compute ez as cross product |
|
|
|
eznorm = torch.cross(ex_normalized, ey_normalized) |
|
|
|
m = torch.stack([ex_normalized, ey_normalized, eznorm, translation]).T |
|
|
|
m = torch.cat([m, torch.tensor([[0., 0., 0., 1.]])], dim=0) |
|
|
|
return m |
|
|
|
|
|
|
|
|
|
|
|
# create an array with (restype, atomtype) --> aa_idx |
|
|
|
# and an array with (restype, atomtype, coord) for the atom positions |
|
|
|
# and compute affine transformation matrices (4,4) from one rigid group to the |
|
|
|
# previous group |
|
|
|
restype_atom37_to_aa = torch.zeros([21, 37], dtype=torch.long) |
|
|
|
restype_atom37_mask = torch.zeros([21, 37], dtype=torch.float32) |
|
|
|
restype_atom37_aa_positions = torch.zeros([21, 37, 3], dtype=torch.float32) |
|
|
|
restype_atom14_to_aa = torch.zeros([21, 14], dtype=torch.long) |
|
|
|
restype_atom14_mask = torch.zeros([21, 14], dtype=torch.float32) |
|
|
|
restype_atom14_aa_positions = torch.zeros([21, 14, 3], dtype=torch.float32) |
|
|
|
restype_aa_default_frame = torch.zeros([21, 8, 4, 4], dtype=torch.float32) |
|
|
|
|
|
|
|
|
|
|
|
def _make_aa_constants(): |
|
|
|
"""Fill the arrays above.""" |
|
|
|
for restype, restype_letter in enumerate(restypes): |
|
|
|
resname = restype_1to3[restype_letter] |
|
|
|
for atomname, group_idx, atom_pos in aa_atom_positions[resname]: |
|
|
|
atomtype = atom_order[atomname] |
|
|
|
restype_atom37_to_aa[restype, atomtype] = group_idx |
|
|
|
restype_atom37_mask[restype, atomtype] = 1 |
|
|
|
restype_atom37_aa_positions[restype, atomtype, :] = atom_pos |
|
|
|
|
|
|
|
atom14idx = restype_name_to_atom14_names[resname].index(atomname) |
|
|
|
restype_atom14_to_aa[restype, atom14idx] = group_idx |
|
|
|
restype_atom14_mask[restype, atom14idx] = 1 |
|
|
|
restype_atom14_aa_positions[restype, atom14idx, :] = atom_pos |
|
|
|
|
|
|
|
for restype, restype_letter in enumerate(restypes): |
|
|
|
resname = restype_1to3[restype_letter] |
|
|
|
atom_positions = { |
|
|
|
name: pos |
|
|
|
for name, _, pos in aa_atom_positions[resname] |
|
|
|
} |
|
|
|
|
|
|
|
# backbone to backbone is the identity transforms |
|
|
|
restype_aa_default_frame[restype, 0, :, :] = torch.eye(4) |
|
|
|
|
|
|
|
# pre-omega-frame to backbone (currently dummy identity matrix) |
|
|
|
restype_aa_default_frame[restype, 1, :, :] = torch.eye(4) |
|
|
|
|
|
|
|
# phi-frame to backbone |
|
|
|
mat = _make_rigid_transformation_4x4(ex=atom_positions['N'] - |
|
|
|
atom_positions['CA'], |
|
|
|
ey=torch.tensor([1., 0., 0.]), |
|
|
|
translation=atom_positions['N']) |
|
|
|
restype_aa_default_frame[restype, 2, :, :] = mat |
|
|
|
|
|
|
|
# psi-frame to backbone |
|
|
|
mat = _make_rigid_transformation_4x4( |
|
|
|
ex=atom_positions['C'] - atom_positions['CA'], |
|
|
|
ey=atom_positions['CA'] - atom_positions['N'], |
|
|
|
translation=atom_positions['C']) |
|
|
|
restype_aa_default_frame[restype, 3, :, :] = mat |
|
|
|
|
|
|
|
# chi1-frame to backbone |
|
|
|
if chi_angles_mask[restype][0]: |
|
|
|
base_atom_names = chi_angles_atoms[resname][0] |
|
|
|
base_atom_positions = [ |
|
|
|
atom_positions[name] for name in base_atom_names |
|
|
|
] |
|
|
|
mat = _make_rigid_transformation_4x4( |
|
|
|
ex=base_atom_positions[2] - base_atom_positions[1], |
|
|
|
ey=base_atom_positions[0] - base_atom_positions[1], |
|
|
|
translation=base_atom_positions[2]) |
|
|
|
restype_aa_default_frame[restype, 4, :, :] = mat |
|
|
|
|
|
|
|
# chi2-frame to chi1-frame |
|
|
|
# chi3-frame to chi2-frame |
|
|
|
# chi4-frame to chi3-frame |
|
|
|
# luckily all rotation axes for the next frame start at (0,0,0) of the |
|
|
|
# previous frame |
|
|
|
for chi_idx in range(1, 4): |
|
|
|
if chi_angles_mask[restype][chi_idx]: |
|
|
|
axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2] |
|
|
|
axis_end_atom_position = atom_positions[axis_end_atom_name] |
|
|
|
mat = _make_rigid_transformation_4x4( |
|
|
|
ex=axis_end_atom_position, |
|
|
|
ey=torch.tensor([-1., 0., 0.]), |
|
|
|
translation=axis_end_atom_position) |
|
|
|
restype_aa_default_frame[restype, 4 + chi_idx, :, :] = mat |
|
|
|
|
|
|
|
|
|
|
|
_make_aa_constants() |
|
|
|
"""Construct denser atom positions (14 dimensions instead of 37).""" |
|
|
|
restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37 |
|
|
|
restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14 |
|
|
|
|
|
|
|
for rt in restypes: |
|
|
|
atom_names = restype_name_to_atom14_names[restype_1to3[rt]] |
|
|
|
|
|
|
|
restype_atom14_to_atom37.append([(atom_order[name] if name else 0) |
|
|
|
for name in atom_names]) |
|
|
|
|
|
|
|
atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} |
|
|
|
restype_atom37_to_atom14.append([ |
|
|
|
(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) |
|
|
|
for name in atom_types |
|
|
|
]) |
|
|
|
|
|
|
|
# Add dummy mapping for restype "UNK" |
|
|
|
restype_atom14_to_atom37.append([0] * 14) |
|
|
|
restype_atom37_to_atom14.append([0] * 37) |
|
|
|
|
|
|
|
restype_atom14_to_atom37 = torch.tensor(restype_atom14_to_atom37, |
|
|
|
dtype=torch.long) |
|
|
|
restype_atom37_to_atom14 = torch.tensor(restype_atom37_to_atom14, |
|
|
|
dtype=torch.long) |
|
|
|
chi_pi_periodic = torch.tensor([ |
|
|
|
[0.0, 0.0, 0.0, 0.0], # ALA |
|
|
|
[0.0, 0.0, 0.0, 0.0], # ARG |
|
|
|
[0.0, 0.0, 0.0, 0.0], # ASN |
|
|
|
[0.0, 1.0, 0.0, 0.0], # ASP |
|
|
|
[0.0, 0.0, 0.0, 0.0], # CYS |
|
|
|
[0.0, 0.0, 0.0, 0.0], # GLN |
|
|
|
[0.0, 0.0, 1.0, 0.0], # GLU |
|
|
|
[0.0, 0.0, 0.0, 0.0], # GLY |
|
|
|
[0.0, 0.0, 0.0, 0.0], # HIS |
|
|
|
[0.0, 0.0, 0.0, 0.0], # ILE |
|
|
|
[0.0, 0.0, 0.0, 0.0], # LEU |
|
|
|
[0.0, 0.0, 0.0, 0.0], # LYS |
|
|
|
[0.0, 0.0, 0.0, 0.0], # MET |
|
|
|
[0.0, 1.0, 0.0, 0.0], # PHE |
|
|
|
[0.0, 0.0, 0.0, 0.0], # PRO |
|
|
|
[0.0, 0.0, 0.0, 0.0], # SET |
|
|
|
[0.0, 0.0, 0.0, 0.0], # THR |
|
|
|
[0.0, 0.0, 0.0, 0.0], # TRP |
|
|
|
[0.0, 1.0, 0.0, 0.0], # TYR |
|
|
|
[0.0, 0.0, 0.0, 0.0], # VAL |
|
|
|
[0.0, 0.0, 0.0, 0.0], # UNK |
|
|
|
]) |
|
|
|
|
|
|
|
residue_atom_renaming_swaps = { |
|
|
|
'ASP': { |
|
|
|
'OD1': 'OD2' |
|
|
|
}, |
|
|
|
'GLU': { |
|
|
|
'OE1': 'OE2' |
|
|
|
}, |
|
|
|
'PHE': { |
|
|
|
'CD1': 'CD2', |
|
|
|
'CE1': 'CE2' |
|
|
|
}, |
|
|
|
'TYR': { |
|
|
|
'CD1': 'CD2', |
|
|
|
'CE1': 'CE2' |
|
|
|
}, |
|
|
|
} |
|
|
|
|
|
|
|
# Create an ambiguous atoms mask. shape: (21, 14). |
|
|
|
mask_ambiguous = torch.zeros((21, 14), dtype=torch.bool) |
|
|
|
for resname, swap in residue_atom_renaming_swaps.items(): |
|
|
|
for atom_name1, atom_name2 in swap.items(): |
|
|
|
restype = restype_order[restype_3to1[resname]] |
|
|
|
atom_idx1 = restype_name_to_atom14_names[resname].index(atom_name1) |
|
|
|
atom_idx2 = restype_name_to_atom14_names[resname].index(atom_name2) |
|
|
|
mask_ambiguous[restype, atom_idx1] = 1 |
|
|
|
mask_ambiguous[restype, atom_idx2] = 1 |
|
|
|
|
|
|
|
restype_3 = [restype_1to3[res] for res in restypes] |
|
|
|
restype_3 += ['UNK'] |
|
|
|
|
|
|
|
all_matrices = {res: torch.eye(14, dtype=torch.float32) for res in restype_3} |
|
|
|
for resname, swap in residue_atom_renaming_swaps.items(): |
|
|
|
correspondences = torch.arange(14) |
|
|
|
renaming_matrix = None |
|
|
|
for source_atom_swap, target_atom_swap in swap.items(): |
|
|
|
source_index = restype_name_to_atom14_names[resname].index( |
|
|
|
source_atom_swap) |
|
|
|
target_index = restype_name_to_atom14_names[resname].index( |
|
|
|
target_atom_swap) |
|
|
|
correspondences[source_index] = target_index |
|
|
|
correspondences[target_index] = source_index |
|
|
|
renaming_matrix = torch.zeros((14, 14), dtype=torch.float32) |
|
|
|
for index, correspondence in enumerate(correspondences): |
|
|
|
renaming_matrix[index, correspondence] = 1. |
|
|
|
all_matrices[resname] = renaming_matrix.to(torch.float32) |
|
|
|
renaming_matrices = torch.stack( |
|
|
|
[all_matrices[restype] for restype in restype_3], dim=0) |
|
|
|
|
|
|
|
|
|
|
|
def substitute(res: str): |
|
|
|
if Bio.PDB.is_aa(res): |
|
|
|
if res in resnames: |
|
|
|
return res |
|
|
|
else: |
|
|
|
res = PDBData.protein_letters_3to1[res] |
|
|
|
if res in restype_1to3.keys(): |
|
|
|
return restype_1to3[res] |
|
|
|
elif res == 'X': |
|
|
|
return 'UNK' |
|
|
|
else: |
|
|
|
# did not get anything that works |
|
|
|
return None |