docs/Analysis/protocol_dashboard.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1e354d5",
   "metadata": {},
   "source": [
    "# Lab Protocol Dashboard\n",
    "\n",
    "This notebook provides an interactive dashboard to explore and manage both YAML protocols and Markdown protocols with YAML frontmatter.\n",
    "\n",
    "## Features\n",
    "- View all protocols in a searchable table\n",
    "- Filter by protocol type (YAML or Markdown)\n",
    "- Compare protocol structures\n",
    "- Visualize protocol statistics\n",
    "\n",
    "Let's start by importing the required libraries and setting up our environment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c5f18cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install dependencies if not already installed\n",
    "import sys\n",
    "import subprocess\n",
    "\n",
    "def install_package(package):\n",
    "    try:\n",
    "        __import__(package)\n",
    "        print(f\"{package} is already installed\")\n",
    "    except ImportError:\n",
    "        print(f\"Installing {package}...\")\n",
    "        subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", package])\n",
    "        print(f\"{package} installed successfully\")\n",
    "\n",
    "# Install required packages\n",
    "install_package(\"pandas\")\n",
    "install_package(\"matplotlib\")\n",
    "install_package(\"ipywidgets\")\n",
    "install_package(\"pyyaml\")\n",
    "install_package(\"plotly\")\n",
    "\n",
    "print(\"\\nAll dependencies are installed and ready to use.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4c1f189",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import yaml\n",
    "import glob\n",
    "import pandas as pd\n",
    "import plotly.express as px\n",
    "import matplotlib.pyplot as plt\n",
    "import ipywidgets as widgets\n",
    "from datetime import datetime\n",
    "from IPython.display import display, HTML, Markdown\n",
    "\n",
    "# Configure paths\n",
    "WORKSPACE_ROOT = \"/workspaces/docs\"\n",
    "PROTOCOLS_DIR = os.path.join(WORKSPACE_ROOT, \"Protocols\")\n",
    "\n",
    "print(f\"Workspace root: {WORKSPACE_ROOT}\")\n",
    "print(f\"Protocols directory: {PROTOCOLS_DIR}\")\n",
    "print(f\"Current working directory: {os.getcwd()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b384ad20",
   "metadata": {},
   "source": [
    "## Load Protocol Data\n",
    "\n",
    "Now we'll load all protocol data from both YAML files and Markdown files with YAML frontmatter."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_frontmatter(markdown_content):\n",
    "    \"\"\"Extract YAML frontmatter from markdown content\"\"\"\n",
    "    pattern = r\"^---\\n(.*?)\\n---\"\n",
    "    match = re.search(pattern, markdown_content, re.DOTALL)\n",
    "    if match:\n",
    "        try:\n",
    "            return yaml.safe_load(match.group(1))\n",
    "        except yaml.YAMLError:\n",
    "            return None\n",
    "    return None\n",
    "\n",
    "def load_protocol_files():\n",
    "    \"\"\"Load protocol data from both YAML and Markdown files\"\"\"\n",
    "    protocols = []\n",
    "    \n",
    "    # Process YAML files\n",
    "    yaml_files = glob.glob(os.path.join(PROTOCOLS_DIR, \"*.yaml\"))\n",
    "    for file_path in yaml_files:\n",
    "        try:\n",
    "            with open(file_path, 'r') as f:\n",
    "                data = yaml.safe_load(f)\n",
    "                if data:\n",
    "                    data['file_path'] = os.path.basename(file_path)\n",
    "                    data['file_type'] = 'yaml'\n",
    "                    protocols.append(data)\n",
    "        except Exception as e:\n",
    "            print(f\"Error reading {file_path}: {e}\")\n",
    "    \n",
    "    # Process Markdown files with frontmatter\n",
    "    md_files = glob.glob(os.path.join(PROTOCOLS_DIR, \"*.md\"))\n",
    "    for file_path in md_files:\n",
    "        try:\n",
    "            with open(file_path, 'r') as f:\n",
    "                content = f.read()\n",
    "                frontmatter = extract_frontmatter(content)\n",
    "                if frontmatter:\n",
    "                    frontmatter['file_path'] = os.path.basename(file_path)\n",
    "                    frontmatter['file_type'] = 'markdown'\n",
    "                    \n",
    "                    # Extract content preview (first 100 chars)\n",
    "                    content_without_frontmatter = re.sub(r\"^---\\n.*?\\n---\\n\", \"\", content, flags=re.DOTALL)\n",
    "                    preview = content_without_frontmatter.strip()[:100] + \"...\"\n",
    "                    frontmatter['content_preview'] = preview\n",
    "                    \n",
    "                    protocols.append(frontmatter)\n",
    "        except Exception as e:\n",
    "            print(f\"Error reading {file_path}: {e}\")\n",
    "    \n",
    "    return protocols\n",
    "\n",
    "# Load all protocols\n",
    "protocols = load_protocol_files()\n",
    "print(f\"Loaded {len(protocols)} protocols\")\n",
    "\n",
    "# Convert to DataFrame for easier manipulation\n",
    "df_protocols = pd.DataFrame(protocols)\n",
    "\n",
    "# Fill missing values with placeholders\n",
    "for col in ['id', 'name', 'version', 'description', 'author', 'created']:\n",
    "    if col not in df_protocols.columns:\n",
    "        df_protocols[col] = None\n",
    "\n",
    "# Preview the dataframe\n",
    "df_protocols[['file_path', 'file_type', 'id', 'name', 'version']].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Protocol Dashboard\n",
    "\n",
    "Let's create a dashboard to explore our protocols. We'll include:\n",
    "1. Summary statistics\n",
    "2. Interactive filtering\n",
    "3. Protocol details viewer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Summary statistics\n",
    "yaml_count = len(df_protocols[df_protocols['file_type'] == 'yaml'])\n",
    "md_count = len(df_protocols[df_protocols['file_type'] == 'markdown'])\n",
    "\n",
    "# Create a nice HTML summary\n",
    "summary_html = f\"\"\"\n",
    "<div style=\"background-color: #f5f5f5; padding: 15px; border-radius: 10px; margin-bottom: 20px;\">\n",
    "    <h2 style=\"margin-top: 0;\">Protocol Dashboard Summary</h2>\n",
    "    <p><strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>\n",
    "    <p><strong>Total Protocols:</strong> {len(df_protocols)}</p>\n",
    "    <ul>\n",
    "        <li><strong>YAML Files:</strong> {yaml_count}</li>\n",
    "        <li><strong>Markdown with Frontmatter:</strong> {md_count}</li>\n",
    "    </ul>\n",
    "</div>\n",
    "\"\"\"\n",
    "\n",
    "display(HTML(summary_html))\n",
    "\n",
    "# Create a pie chart of file types\n",
    "fig = px.pie(values=[yaml_count, md_count], \n",
    "             names=['YAML', 'Markdown'], \n",
    "             title='Protocol File Types',\n",
    "             color_discrete_sequence=['#636EFA', '#EF553B'])\n",
    "fig.update_layout(width=600, height=400)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Interactive Protocol Explorer\n",
    "\n",
    "Use the filters below to explore your protocols:"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}