mirror of
https://github.com/the-jordan-lab/docs.git
synced 2025-05-09 21:32:38 +00:00
295 lines
11 KiB
Plaintext
295 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Histidine Interface Visualization\n",
|
|
"\n",
|
|
"This notebook visualizes histidine-mediated cation-\u03c0 and \u03c0-\u03c0 interactions in protein structures."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Import required packages\n",
|
|
"import py3Dmol\n",
|
|
"import os\n",
|
|
"import tempfile\n",
|
|
"from Bio import PDB\n",
|
|
"from IPython.display import HTML, display\n",
|
|
"import glob"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Constants for visualization\n",
|
|
"CHAIN_A_SURFACE = '#4e79a7' # Darker blue\n",
|
|
"CHAIN_A_STICK = '#85b0d5' # Lighter blue\n",
|
|
"CHAIN_A_LABEL = '#2c4e6f' # Dark blue for label text\n",
|
|
"\n",
|
|
"CHAIN_B_SURFACE = '#f2be2b' # Gold\n",
|
|
"CHAIN_B_STICK = '#f2be2b' # Same as surface\n",
|
|
"CHAIN_B_LABEL = '#8B4513' # Dark brown\n",
|
|
"\n",
|
|
"# Amino acid mapping\n",
|
|
"ONE_LETTER_MAP = {\n",
|
|
" 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D',\n",
|
|
" 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',\n",
|
|
" 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',\n",
|
|
" 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',\n",
|
|
" 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'\n",
|
|
"}\n",
|
|
"\n",
|
|
"# Residue type definitions\n",
|
|
"CATION_RES = {'ARG', 'LYS', 'HIS'}\n",
|
|
"AROMATIC_RES = {'PHE', 'TYR', 'TRP', 'HIS'}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def convert_cif_to_pdb(cif_file):\n",
|
|
" \"\"\"Convert a CIF file to PDB format using BioPython.\"\"\"\n",
|
|
" try:\n",
|
|
" fd, temp_pdb = tempfile.mkstemp(suffix=\".pdb\")\n",
|
|
" os.close(fd)\n",
|
|
" parser = PDB.MMCIFParser(QUIET=True)\n",
|
|
" structure = parser.get_structure(\"structure\", cif_file)\n",
|
|
" io = PDB.PDBIO()\n",
|
|
" io.set_structure(structure)\n",
|
|
" io.save(temp_pdb)\n",
|
|
" return temp_pdb\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Error converting {cif_file} to PDB: {e}\")\n",
|
|
" return None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_sidechain_top_atom(residue):\n",
|
|
" \"\"\"Get the top atom of a residue's sidechain for visualization.\"\"\"\n",
|
|
" if residue.get_resname() == 'HIS':\n",
|
|
" return residue['CE1']\n",
|
|
" elif residue.get_resname() in {'PHE', 'TYR'}:\n",
|
|
" return residue['CZ']\n",
|
|
" elif residue.get_resname() == 'TRP':\n",
|
|
" return residue['CH2']\n",
|
|
" elif residue.get_resname() == 'ARG':\n",
|
|
" return residue['CZ']\n",
|
|
" elif residue.get_resname() == 'LYS':\n",
|
|
" return residue['NZ']\n",
|
|
" return None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def find_histidine_pairs(chain_a, chain_b, distance_cutoff=5.0):\n",
|
|
" \"\"\"Identify cation\u2013\u03c0 or \u03c0\u2013\u03c0 interactions with at least one HIS residue.\"\"\"\n",
|
|
" pairs = []\n",
|
|
" for residue_a in chain_a:\n",
|
|
" resn_a = residue_a.get_resname()\n",
|
|
" for residue_b in chain_b:\n",
|
|
" resn_b = residue_b.get_resname()\n",
|
|
" is_a_HIS = (resn_a == 'HIS')\n",
|
|
" is_b_HIS = (resn_b == 'HIS')\n",
|
|
" is_a_cation_or_aromatic = (resn_a in CATION_RES or resn_a in AROMATIC_RES)\n",
|
|
" is_b_cation_or_aromatic = (resn_b in CATION_RES or resn_b in AROMATIC_RES)\n",
|
|
"\n",
|
|
" if (is_a_HIS and is_b_cation_or_aromatic) or (is_b_HIS and is_a_cation_or_aromatic):\n",
|
|
" for atom_a in residue_a:\n",
|
|
" for atom_b in residue_b:\n",
|
|
" try:\n",
|
|
" if (atom_a - atom_b) < distance_cutoff:\n",
|
|
" if (is_a_HIS and resn_b in CATION_RES) or (is_b_HIS and resn_a in CATION_RES):\n",
|
|
" itype = '+:\u03c0' # cation\u2013\u03c0\n",
|
|
" else:\n",
|
|
" itype = '\u03c0:\u03c0' # \u03c0\u2013\u03c0\n",
|
|
" pairs.append((residue_a, residue_b, itype))\n",
|
|
" break\n",
|
|
" except Exception:\n",
|
|
" continue\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
" break\n",
|
|
" return pairs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_viewer(pdb_data, viewer_type='ribbon', histidine_pairs=None):\n",
|
|
" \"\"\"Create a py3Dmol viewer with the specified visualization type.\"\"\"\n",
|
|
" viewer = py3Dmol.view(width=800, height=600)\n",
|
|
" viewer.addModel(pdb_data, \"pdb\")\n",
|
|
" \n",
|
|
" # Add surfaces\n",
|
|
" viewer.addSurface(py3Dmol.SAS, {'opacity': 0.6, 'color': CHAIN_A_SURFACE}, {'chain': 'A'})\n",
|
|
" viewer.addSurface(py3Dmol.SAS, {'opacity': 0.6, 'color': CHAIN_B_SURFACE}, {'chain': 'B'})\n",
|
|
" \n",
|
|
" if viewer_type == 'ribbon':\n",
|
|
" # Add ribbon view\n",
|
|
" viewer.setStyle({'chain': 'A'}, {'cartoon': {'color': CHAIN_A_SURFACE, 'opacity': 1.0}})\n",
|
|
" viewer.setStyle({'chain': 'B'}, {'cartoon': {'color': CHAIN_B_SURFACE, 'opacity': 1.0}})\n",
|
|
" else:\n",
|
|
" # Hide cartoon and show sticks for interacting residues\n",
|
|
" viewer.setStyle({'model': -1}, {'cartoon': {'hidden': True}})\n",
|
|
" \n",
|
|
" if histidine_pairs:\n",
|
|
" for resA, resB, itype in histidine_pairs:\n",
|
|
" chainA_id = resA.get_parent().id\n",
|
|
" chainB_id = resB.get_parent().id\n",
|
|
" resA_id = resA.get_id()[1]\n",
|
|
" resB_id = resB.get_id()[1]\n",
|
|
" \n",
|
|
" colorA = CHAIN_A_STICK if chainA_id == 'A' else CHAIN_B_STICK\n",
|
|
" colorB = CHAIN_A_STICK if chainB_id == 'A' else CHAIN_B_STICK\n",
|
|
" \n",
|
|
" viewer.setStyle({'chain': chainA_id, 'resi': resA_id}, \n",
|
|
" {'stick': {'color': colorA, 'radius': 0.3}})\n",
|
|
" viewer.setStyle({'chain': chainB_id, 'resi': resB_id}, \n",
|
|
" {'stick': {'color': colorB, 'radius': 0.3}})\n",
|
|
" \n",
|
|
" # Add dotted line between interacting residues\n",
|
|
" topA = get_sidechain_top_atom(resA)\n",
|
|
" topB = get_sidechain_top_atom(resB)\n",
|
|
" if topA and topB:\n",
|
|
" x1, y1, z1 = topA.coord\n",
|
|
" x2, y2, z2 = topB.coord\n",
|
|
" viewer.addLine({\n",
|
|
" 'start': {'x': float(x1), 'y': float(y1), 'z': float(z1)},\n",
|
|
" 'end': {'x': float(x2), 'y': float(y2), 'z': float(z2)},\n",
|
|
" 'color': 'blue',\n",
|
|
" 'linewidth': 4,\n",
|
|
" 'dashed': True,\n",
|
|
" 'dashLength': 0.4,\n",
|
|
" 'gapLength': 0.2\n",
|
|
" })\n",
|
|
" \n",
|
|
" viewer.zoomTo()\n",
|
|
" return viewer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def visualize_structure(file_path):\n",
|
|
" \"\"\"Visualize a structure with both ribbon and labeled views.\"\"\"\n",
|
|
" # Handle CIF files\n",
|
|
" if file_path.lower().endswith('.cif'):\n",
|
|
" temp_pdb = convert_cif_to_pdb(file_path)\n",
|
|
" if not temp_pdb:\n",
|
|
" print(f\"Could not process CIF file: {file_path}\")\n",
|
|
" return\n",
|
|
" file_path = temp_pdb\n",
|
|
" \n",
|
|
" # Parse structure\n",
|
|
" parser = PDB.PDBParser(QUIET=True)\n",
|
|
" structure = parser.get_structure('model', file_path)\n",
|
|
" \n",
|
|
" try:\n",
|
|
" chain_a = structure[0]['A']\n",
|
|
" chain_b = structure[0]['B']\n",
|
|
" except KeyError:\n",
|
|
" print(f\"Could not find chain A or B in: {file_path}\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Find histidine pairs\n",
|
|
" histidine_pairs = find_histidine_pairs(chain_a, chain_b, distance_cutoff=5.0)\n",
|
|
" \n",
|
|
" # Read PDB data\n",
|
|
" with open(file_path, 'r') as fh:\n",
|
|
" pdb_data = fh.read()\n",
|
|
" \n",
|
|
" # Create viewers\n",
|
|
" ribbon_viewer = create_viewer(pdb_data, 'ribbon')\n",
|
|
" label_viewer = create_viewer(pdb_data, 'label', histidine_pairs)\n",
|
|
" \n",
|
|
" # Display viewers side by side\n",
|
|
" display(HTML(f\"<div style='display: flex; justify-content: space-between;'>\"))\n",
|
|
" display(HTML(\"<div style='width: 48%;'>\"))\n",
|
|
" ribbon_viewer.show()\n",
|
|
" display(HTML(\"</div>\"))\n",
|
|
" display(HTML(\"<div style='width: 48%;'>\"))\n",
|
|
" label_viewer.show()\n",
|
|
" display(HTML(\"</div>\"))\n",
|
|
" display(HTML(\"</div>\"))\n",
|
|
" \n",
|
|
" # Clean up temporary file if it was a CIF\n",
|
|
" if file_path.lower().endswith('.cif') and os.path.exists(temp_pdb):\n",
|
|
" os.remove(temp_pdb)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# List available PDB/CIF files\n",
|
|
"model_files = glob.glob('ndufs-7-acot-9-mm-af2-models/*.pdb') + \\\n",
|
|
" glob.glob('ndufs-7-acot-9-mm-af2-models/*.cif')\n",
|
|
"print(f\"Found {len(model_files)} model files:\")\n",
|
|
"for i, file in enumerate(model_files):\n",
|
|
" print(f\"{i+1}. {os.path.basename(file)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Visualize each model\n",
|
|
"for i, file_path in enumerate(model_files):\n",
|
|
" print(f\"\\nProcessing model {i+1}: {os.path.basename(file_path)}\")\n",
|
|
" visualize_structure(file_path)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
} |