mirror of
https://github.com/the-jordan-lab/docs.git
synced 2025-05-09 21:32:38 +00:00
Update project structure and add experiment files
This commit is contained in:
parent
1ee288e07b
commit
4ab492fffd
15
.cursor/prompt.md
Normal file
15
.cursor/prompt.md
Normal file
@ -0,0 +1,15 @@
|
||||
You are LabAgent v2.
|
||||
When the user asks to create or update an experiment you MUST:
|
||||
1. Gather: aim, project, plate id, full plate layout, sample-prep, downstream-app, researcher.
|
||||
2. Call create_experiment_with_plate (or update) using the YAML schema above.
|
||||
3. For every lab step (seed, transfect, prep, assay) call add_experiment_task → a GitHub Issue is created and appended under tasks: with id and status: open.
|
||||
4. After each Issue is created, use the gh CLI to also add it to the GitHub Project "Lab Tasks Board".
|
||||
5. Commit the new/updated YAML and data files. Commit message pattern:
|
||||
Experiment: <action> EXP-XXXX – <short title>
|
||||
6. When the user runs lab record or drops a file into Data/EXP-XXXX/, call record_data to append data:.
|
||||
7. On "finish experiment" requests:
|
||||
• verify that all tasks.status == closed and at least one data: entry exists;
|
||||
• set status: completed;
|
||||
• commit and update SESSION_LOG.md.
|
||||
8. Never ask for confirmation when data can be auto-inferred (Smart-Fill); ask only when a required field is ambiguous.
|
||||
9. Keep chat concise; rely on repository state as the single source of truth.
|
11
.devcontainer/Dockerfile.backup
Normal file
11
.devcontainer/Dockerfile.backup
Normal file
@ -0,0 +1,11 @@
|
||||
FROM mcr.microsoft.com/devcontainers/python:3.10
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up a working directory
|
||||
WORKDIR /workspaces/lab-docs
|
||||
|
||||
# (Optional) Install additional tools or dependencies here
|
@ -13,7 +13,7 @@
|
||||
"containerEnv": {
|
||||
"OPENAI_API_KEY": "${localEnv:OPENAI_API_KEY}"
|
||||
},
|
||||
"postCreateCommand": "pip install -r requirements.txt && chmod +x .devcontainer/jupyter-setup.sh && .devcontainer/jupyter-setup.sh && chmod +x .devcontainer/windsurf-auth-setup.sh",
|
||||
"postCreateCommand": "pip install -r requirements.txt && pip install -e Agent && chmod +x .devcontainer/jupyter-setup.sh && .devcontainer/jupyter-setup.sh && chmod +x .devcontainer/windsurf-auth-setup.sh",
|
||||
"postStartCommand": "bash .devcontainer/postStartCommand.sh && WINDSURF_AUTH_TOKEN='eyJhbGciOiJSUzI1NiIsImtpZCI6IjU5MWYxNWRlZTg0OTUzNjZjOTgyZTA1MTMzYmNhOGYyNDg5ZWFjNzIiLCJ0eXAiOiJKV1QifQ.eyJwaWN0dXJlIjoiaHR0cHM6Ly9hdmF0YXJzLmdpdGh1YnVzZXJjb250ZW50LmNvbS91LzE5NDMyOTQ4ND92PTQiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vZXhhMi1mYjE3MCIsImF1ZCI6ImV4YTItZmIxNzAiLCJhdXRoX3RpbWUiOjE3NDY1NzA2ODAsInVzZXJfaWQiOiJNTVJZY3NVb3ZVaFNVc2RhMkhrd1J4UE5tTWQyIiwic3ViIjoiTU1SWWNzVW92VWhTVXNkYTJIa3dSeFBObU1kMiIsImlhdCI6MTc0NjU3MDY4MSwiZXhwIjoxNzQ2NTc0MjgxLCJlbWFpbCI6ImppbUBqb3JkYW5sYWIub3JnIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7ImdpdGh1Yi5jb20iOlsiMTk0MzI5NDg0Il0sImVtYWlsIjpbImppbUBqb3JkYW5sYWIub3JnIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ2l0aHViLmNvbSJ9fQ.kgY8pNERRK3d2weIc4eR0EI_JKfATIxE9b6ACKdQE1XVwU26_p07PGfvdWo4ty8oXjCdUdnUiprM1LKaT2yZPGnleOdeJtH31Ua1DNc7hNTEideMeTyZUAOXv6O1VJdXqpcRfjc5Q5JxEjJdj1cGdomFA1c_kn3VbGyL8BsAfH6Sg6q7fB4eRRQ5MlHPBDxQl7neHsdDVGhqGprRnWfJsOI0PJhsWC4jzSBM5HO3uFKOnl_9-BdGY_zN6j_uRcFXfHB3VxfSecinepjz3u5fdmEd71YpGJNRhGXYiM7pZBhETCvA9Ri-b1Jh74dsoAyPfmnDOdt0c0xBF2TmPEPgZA' .devcontainer/windsurf-auth-setup.sh && jupyter notebook --no-browser --ip=0.0.0.0 --port=8888 --NotebookApp.token='' --NotebookApp.password='' --allow-root &",
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
@ -23,7 +23,10 @@
|
||||
"ms-toolsai.vscode-jupyter-cell-tags",
|
||||
"ms-toolsai.jupyter-keymap",
|
||||
"ms-toolsai.jupyter-renderers",
|
||||
"windsurf-dev.windsurf"
|
||||
"windsurf-dev.windsurf",
|
||||
"github.vscode-pull-request-github",
|
||||
"ms-vscode.github-issues-prs",
|
||||
"gruntfuggly.todo-tree"
|
||||
],
|
||||
"forwardPorts": [8888],
|
||||
"remoteUser": "vscode",
|
||||
|
39
.devcontainer/devcontainer.json.backup
Normal file
39
.devcontainer/devcontainer.json.backup
Normal file
@ -0,0 +1,39 @@
|
||||
{
|
||||
"name": "Lab Management Codespace",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
"settings": {
|
||||
"terminal.integrated.shell.linux": "/bin/bash",
|
||||
"python.defaultInterpreterPath": "/usr/local/bin/python",
|
||||
"jupyter.alwaysTrustNotebooks": true,
|
||||
"workbench.startupEditor": "none"
|
||||
},
|
||||
"postCreateCommand": "pip install -r requirements.txt && chmod +x .devcontainer/jupyter-setup.sh && .devcontainer/jupyter-setup.sh && chmod +x .devcontainer/windsurf-auth-setup.sh",
|
||||
"postStartCommand": "bash .devcontainer/postStartCommand.sh && WINDSURF_AUTH_TOKEN='eyJhbGciOiJSUzI1NiIsImtpZCI6IjU5MWYxNWRlZTg0OTUzNjZjOTgyZTA1MTMzYmNhOGYyNDg5ZWFjNzIiLCJ0eXAiOiJKV1QifQ.eyJwaWN0dXJlIjoiaHR0cHM6Ly9hdmF0YXJzLmdpdGh1YnVzZXJjb250ZW50LmNvbS91LzE5NDMyOTQ4ND92PTQiLCJpc3MiOiJodHRwczovL3NlY3VyZXRva2VuLmdvb2dsZS5jb20vZXhhMi1mYjE3MCIsImF1ZCI6ImV4YTItZmIxNzAiLCJhdXRoX3RpbWUiOjE3NDY1NzA2ODAsInVzZXJfaWQiOiJNTVJZY3NVb3ZVaFNVc2RhMkhrd1J4UE5tTWQyIiwic3ViIjoiTU1SWWNzVW92VWhTVXNkYTJIa3dSeFBObU1kMiIsImlhdCI6MTc0NjU3MDY4MSwiZXhwIjoxNzQ2NTc0MjgxLCJlbWFpbCI6ImppbUBqb3JkYW5sYWIub3JnIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7ImdpdGh1Yi5jb20iOlsiMTk0MzI5NDg0Il0sImVtYWlsIjpbImppbUBqb3JkYW5sYWIub3JnIl19LCJzaWduX2luX3Byb3ZpZGVyIjoiZ2l0aHViLmNvbSJ9fQ.kgY8pNERRK3d2weIc4eR0EI_JKfATIxE9b6ACKdQE1XVwU26_p07PGfvdWo4ty8oXjCdUdnUiprM1LKaT2yZPGnleOdeJtH31Ua1DNc7hNTEideMeTyZUAOXv6O1VJdXqpcRfjc5Q5JxEjJdj1cGdomFA1c_kn3VbGyL8BsAfH6Sg6q7fB4eRRQ5MlHPBDxQl7neHsdDVGhqGprRnWfJsOI0PJhsWC4jzSBM5HO3uFKOnl_9-BdGY_zN6j_uRcFXfHB3VxfSecinepjz3u5fdmEd71YpGJNRhGXYiM7pZBhETCvA9Ri-b1Jh74dsoAyPfmnDOdt0c0xBF2TmPEPgZA' .devcontainer/windsurf-auth-setup.sh && jupyter notebook --no-browser --ip=0.0.0.0 --port=8888 --NotebookApp.token='' --NotebookApp.password='' --allow-root &",
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"redhat.vscode-yaml",
|
||||
"ms-toolsai.jupyter",
|
||||
"ms-toolsai.vscode-jupyter-cell-tags",
|
||||
"ms-toolsai.jupyter-keymap",
|
||||
"ms-toolsai.jupyter-renderers",
|
||||
"windsurf-dev.windsurf"
|
||||
],
|
||||
"forwardPorts": [8888],
|
||||
"remoteUser": "vscode",
|
||||
"features": {
|
||||
"github-cli": "latest"
|
||||
},
|
||||
"customizations": {
|
||||
"codespaces": {
|
||||
"openFiles": ["/workspaces/docs/Analysis/protocol_dashboard.ipynb"]
|
||||
},
|
||||
"vscode": {
|
||||
"settings": {
|
||||
"windsurf.authTokenPath": "~/.windsurf/auth.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
348
Agent/experiment_handler_patch.py
Normal file
348
Agent/experiment_handler_patch.py
Normal file
@ -0,0 +1,348 @@
|
||||
"""
|
||||
Code snippet to be added to agent_runner.py to handle creating experiments with multiblock markdown templates.
|
||||
This would extend the existing functionality to support the richer experiment format.
|
||||
"""
|
||||
|
||||
def handle_create_multiblock_experiment(self, args: Dict[str, Any]):
|
||||
"""
|
||||
Handle creation of a new experiment using the multiblock markdown template.
|
||||
This creates a structured markdown file with multiple YAML frontmatter blocks
|
||||
and placeholder sections for data, analysis, and next steps.
|
||||
|
||||
Args should include:
|
||||
title (str): Experiment title
|
||||
researchers (list): List of researchers involved
|
||||
protocol_id (str): Protocol ID (e.g., PROT-XXXX)
|
||||
protocol_name (str): Protocol name
|
||||
project (str): Project name
|
||||
aim (str): Brief description of experimental aim
|
||||
cell_lines (list): List of cell lines used
|
||||
plate_format (str): Plate format (e.g., 24-well)
|
||||
condition_map (str): Map of conditions in the plate
|
||||
additional_metadata (dict): Any additional metadata fields
|
||||
"""
|
||||
# Load the multiblock template
|
||||
template_path = os.path.join("Templates", "experiment_multiblock.md")
|
||||
with open(template_path, "r") as f:
|
||||
template = f.read()
|
||||
|
||||
# Generate experiment ID if not provided
|
||||
if not args.get("experiment_id"):
|
||||
# Generate unique ID with date-based prefix
|
||||
date_prefix = datetime.now().strftime("%Y%m%d")
|
||||
existing_ids = [f for f in os.listdir("Experiments") if f.startswith("EXP-")]
|
||||
existing_nums = [int(f.split("-")[1].split("_")[0])
|
||||
for f in existing_ids if re.match(r"EXP-\d+", f)]
|
||||
next_num = max(existing_nums) + 1 if existing_nums else 1
|
||||
args["experiment_id"] = f"EXP-{next_num:04d}"
|
||||
|
||||
# Current date if not provided
|
||||
if not args.get("date"):
|
||||
args["date"] = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Fill template with args
|
||||
# This is a simple placeholder - in a real implementation, we'd handle the
|
||||
# multiple frontmatter blocks more carefully
|
||||
for key, value in args.items():
|
||||
if isinstance(value, (str, int, float)):
|
||||
template = template.replace(f"{{{{{key}}}}}", str(value))
|
||||
|
||||
# Generate filename with experiment ID and title
|
||||
experiment_id = args.get("experiment_id")
|
||||
title = args.get("title", "untitled").lower().replace(" ", "-")
|
||||
filename = f"{experiment_id}-{title}.md"
|
||||
out_path = os.path.join("Experiments", filename)
|
||||
|
||||
# Ensure unique filename if exists
|
||||
i = 1
|
||||
base_filename = filename
|
||||
while os.path.exists(out_path):
|
||||
filename = f"{experiment_id}-{title}-{i}.md"
|
||||
out_path = os.path.join("Experiments", filename)
|
||||
i += 1
|
||||
|
||||
# Create data directories for the experiment
|
||||
data_dir = os.path.join("Data", experiment_id)
|
||||
os.makedirs(os.path.join(data_dir, "raw"), exist_ok=True)
|
||||
os.makedirs(os.path.join(data_dir, "figures"), exist_ok=True)
|
||||
|
||||
# Create analysis script placeholder if needed
|
||||
analysis_dir = "Analysis"
|
||||
os.makedirs(analysis_dir, exist_ok=True)
|
||||
|
||||
# Script path based on experiment type
|
||||
if args.get("analysis_type") == "mRNA_stability":
|
||||
script_path = os.path.join(analysis_dir, f"{experiment_id}_mRNA_stability_analysis.R")
|
||||
# Here we would create a placeholder R script tailored to mRNA stability analysis
|
||||
elif args.get("analysis_type") == "qPCR":
|
||||
script_path = os.path.join(analysis_dir, f"{experiment_id}_qPCR_analysis.R")
|
||||
# Create a placeholder qPCR analysis script
|
||||
|
||||
# Write the experiment file
|
||||
with open(out_path, "w") as f:
|
||||
f.write(template)
|
||||
|
||||
# Log the action
|
||||
self.logger.info(f"Created multiblock experiment: {out_path}")
|
||||
self.logger.info(f"Created data directories: {data_dir}/raw and {data_dir}/figures")
|
||||
|
||||
# Update user profile
|
||||
for researcher in args.get("researchers", []):
|
||||
researcher_id = researcher.replace(" ", "_").lower()
|
||||
self._update_user_profile(researcher_id, "recent_experiments", experiment_id)
|
||||
for cell_line in args.get("cell_lines", []):
|
||||
if isinstance(cell_line, dict) and "name" in cell_line:
|
||||
self._update_user_profile(researcher_id, "frequent_cell_lines", cell_line["name"])
|
||||
elif isinstance(cell_line, str):
|
||||
self._update_user_profile(researcher_id, "frequent_cell_lines", cell_line)
|
||||
|
||||
# Append to CHANGELOG.md
|
||||
self.append_changelog(f"Created new multiblock experiment {experiment_id}: {args.get('title')}")
|
||||
|
||||
# Check if there are any experiment tasks to add to TASKS.md
|
||||
if args.get("next_steps"):
|
||||
self.add_experiment_tasks_to_tasklist(experiment_id, args.get("next_steps"))
|
||||
|
||||
return {
|
||||
"experiment_id": experiment_id,
|
||||
"path": out_path,
|
||||
"data_dir": data_dir,
|
||||
"analysis_script": script_path if "script_path" in locals() else None
|
||||
}
|
||||
|
||||
def handle_update_multiblock_experiment(self, args: Dict[str, Any]):
|
||||
"""
|
||||
Handle updating an existing multiblock experiment markdown file.
|
||||
|
||||
Args should include:
|
||||
experiment_id (str): Experiment ID to update
|
||||
section (str): Section to update (metadata, sample_metadata, results, interpretation, etc.)
|
||||
content (dict or str): Content to update in the section
|
||||
next_steps (list, optional): Updated next steps list
|
||||
status (str, optional): New experiment status
|
||||
"""
|
||||
experiment_id = args.get("experiment_id")
|
||||
if not experiment_id:
|
||||
self.logger.error("Missing experiment_id for update_multiblock_experiment.")
|
||||
return
|
||||
|
||||
# Find experiment file
|
||||
exp_dir = "Experiments"
|
||||
exp_file = None
|
||||
for fname in os.listdir(exp_dir):
|
||||
if experiment_id in fname and fname.endswith(".md"):
|
||||
exp_file = os.path.join(exp_dir, fname)
|
||||
break
|
||||
|
||||
if not exp_file or not os.path.exists(exp_file):
|
||||
self.logger.error(f"Experiment file not found for id: {experiment_id}")
|
||||
return
|
||||
|
||||
# Read the current file content
|
||||
with open(exp_file, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Process updates - this is a simplified example
|
||||
# A real implementation would parse the Markdown and YAML blocks properly
|
||||
|
||||
section = args.get("section")
|
||||
section_content = args.get("content")
|
||||
|
||||
# Handle status updates
|
||||
if args.get("status"):
|
||||
new_status = args.get("status")
|
||||
# Update status in the YAML frontmatter
|
||||
status_pattern = r"status: .*"
|
||||
content = re.sub(status_pattern, f"status: {new_status}", content)
|
||||
|
||||
# Handle next steps updates
|
||||
if args.get("next_steps"):
|
||||
# Locate the Next Steps section and replace it
|
||||
next_steps_pattern = r"# 5️⃣ Next Steps ✅.*?(?=# 6️⃣|$)"
|
||||
next_steps_content = "# 5️⃣ Next Steps ✅\n_Check boxes when complete. These can auto-update TASKS.md._\n\n"
|
||||
for step in args.get("next_steps"):
|
||||
checked = "x" if step.get("completed") else " "
|
||||
next_steps_content += f"- [{checked}] {step.get('description')}\n"
|
||||
content = re.sub(next_steps_pattern, next_steps_content, content, flags=re.DOTALL)
|
||||
|
||||
# Update TASKS.md based on checked items
|
||||
self.update_tasks_from_experiment(experiment_id, args.get("next_steps"))
|
||||
|
||||
# Handle section-specific updates
|
||||
if section and section_content:
|
||||
if section.lower() in ["metadata", "sample_metadata", "reagents"]:
|
||||
# Update YAML frontmatter blocks
|
||||
# This would require more sophisticated YAML parsing in a real implementation
|
||||
pass
|
||||
elif section.lower() in ["results", "interpretation", "discussion"]:
|
||||
# Update markdown sections
|
||||
section_pattern = rf"## {section.title()}.*?(?=##|$)"
|
||||
new_section = f"## {section.title()}\n{section_content}\n\n"
|
||||
content = re.sub(section_pattern, new_section, content, flags=re.DOTALL)
|
||||
|
||||
# Write updated content back to file
|
||||
with open(exp_file, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
# Log the update
|
||||
self.logger.info(f"Updated multiblock experiment: {exp_file}")
|
||||
|
||||
# Append to CHANGELOG.md
|
||||
self.append_changelog(f"Updated experiment {experiment_id}: {section if section else 'various sections'}")
|
||||
|
||||
# If experiment is completed, verify all required fields are present
|
||||
if args.get("status") == "completed":
|
||||
self.validate_experiment_completion(experiment_id, exp_file)
|
||||
|
||||
return {
|
||||
"experiment_id": experiment_id,
|
||||
"path": exp_file,
|
||||
"updated_section": section
|
||||
}
|
||||
|
||||
def validate_experiment_completion(self, experiment_id, file_path):
|
||||
"""Validate that a completed experiment has all required fields."""
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
required_sections = [
|
||||
"# 3️⃣ Results & Analysis",
|
||||
"# 4️⃣ Interpretation"
|
||||
]
|
||||
|
||||
missing = []
|
||||
for section in required_sections:
|
||||
if section not in content or re.search(rf"{section}.*?_[^_]*_", content, re.DOTALL):
|
||||
# Section missing or only contains placeholder text
|
||||
missing.append(section.replace("#", "").strip())
|
||||
|
||||
if missing:
|
||||
issue_title = f"Experiment {experiment_id} missing required sections"
|
||||
issue_body = f"The following required sections need to be completed: {', '.join(missing)}. Please update the experiment record."
|
||||
self.handle_open_issue({"title": issue_title, "body": issue_body})
|
||||
return False
|
||||
|
||||
# Mark related tasks as complete in TASKS.md
|
||||
self.mark_experiment_complete_in_tasks(experiment_id)
|
||||
return True
|
||||
|
||||
def update_tasks_from_experiment(self, experiment_id, next_steps):
|
||||
"""Update TASKS.md based on experiment next steps."""
|
||||
if not os.path.exists("TASKS.md"):
|
||||
return
|
||||
|
||||
with open("TASKS.md", "r") as f:
|
||||
tasks_content = f.readlines()
|
||||
|
||||
# Find experiment section in TASKS.md or create it
|
||||
exp_section_idx = -1
|
||||
for i, line in enumerate(tasks_content):
|
||||
if experiment_id in line and "##" in line:
|
||||
exp_section_idx = i
|
||||
break
|
||||
|
||||
if exp_section_idx == -1:
|
||||
# Section not found, append at the end of Lab Tasks
|
||||
lab_tasks_idx = -1
|
||||
for i, line in enumerate(tasks_content):
|
||||
if "## Lab Tasks" in line:
|
||||
lab_tasks_idx = i
|
||||
break
|
||||
|
||||
if lab_tasks_idx != -1:
|
||||
# Create new section
|
||||
tasks_content.insert(lab_tasks_idx + 1, f"### {experiment_id} Tasks\n")
|
||||
exp_section_idx = lab_tasks_idx + 1
|
||||
else:
|
||||
# Create Lab Tasks section and experiment section
|
||||
tasks_content.append("\n## Lab Tasks\n")
|
||||
tasks_content.append(f"### {experiment_id} Tasks\n")
|
||||
exp_section_idx = len(tasks_content) - 1
|
||||
|
||||
# Update or add tasks under this section
|
||||
updated_tasks = []
|
||||
for step in next_steps:
|
||||
checked = "x" if step.get("completed") else " "
|
||||
updated_tasks.append(f"- [{checked}] {step.get('description')}\n")
|
||||
|
||||
# Find the end of the section
|
||||
end_idx = len(tasks_content)
|
||||
for i in range(exp_section_idx + 1, len(tasks_content)):
|
||||
if tasks_content[i].startswith("##"):
|
||||
end_idx = i
|
||||
break
|
||||
|
||||
# Replace the tasks in this section
|
||||
new_content = tasks_content[:exp_section_idx + 1] + updated_tasks + tasks_content[end_idx:]
|
||||
|
||||
with open("TASKS.md", "w") as f:
|
||||
f.writelines(new_content)
|
||||
|
||||
self.logger.info(f"Updated {experiment_id} tasks in TASKS.md")
|
||||
|
||||
def mark_experiment_complete_in_tasks(self, experiment_id):
|
||||
"""Mark all tasks for an experiment as complete in TASKS.md when the experiment is completed."""
|
||||
if not os.path.exists("TASKS.md"):
|
||||
return
|
||||
|
||||
with open("TASKS.md", "r") as f:
|
||||
tasks_content = f.readlines()
|
||||
|
||||
updated = False
|
||||
in_experiment_section = False
|
||||
|
||||
for i, line in enumerate(tasks_content):
|
||||
if experiment_id in line and "##" in line:
|
||||
in_experiment_section = True
|
||||
continue
|
||||
|
||||
if in_experiment_section:
|
||||
if line.startswith("##"):
|
||||
# End of section
|
||||
in_experiment_section = False
|
||||
elif line.strip().startswith("- [ ]"):
|
||||
# Unchecked task in this experiment, mark as done
|
||||
tasks_content[i] = line.replace("- [ ]", "- [x]", 1)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
with open("TASKS.md", "w") as f:
|
||||
f.writelines(tasks_content)
|
||||
|
||||
self.logger.info(f"Marked all tasks for {experiment_id} as complete in TASKS.md")
|
||||
|
||||
def add_experiment_tasks_to_tasklist(self, experiment_id, tasks):
|
||||
"""Add tasks from experiment next steps to TASKS.md."""
|
||||
if not os.path.exists("TASKS.md"):
|
||||
with open("TASKS.md", "w") as f:
|
||||
f.write("# Lab Task List\n\n## Lab Tasks\n")
|
||||
|
||||
with open("TASKS.md", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Check if Lab Tasks section exists
|
||||
if "## Lab Tasks" not in content:
|
||||
content += "\n## Lab Tasks\n"
|
||||
|
||||
# Check if this experiment already has a section
|
||||
if f"### {experiment_id}" in content:
|
||||
# Will be handled by update_tasks_from_experiment
|
||||
return
|
||||
|
||||
# Add new section for this experiment
|
||||
new_section = f"\n### {experiment_id} Tasks\n"
|
||||
for task in tasks:
|
||||
new_section += f"- [ ] {task.get('description')}\n"
|
||||
|
||||
# Insert after Lab Tasks header
|
||||
lab_tasks_idx = content.find("## Lab Tasks")
|
||||
if lab_tasks_idx != -1:
|
||||
insert_idx = lab_tasks_idx + len("## Lab Tasks") + 1
|
||||
content = content[:insert_idx] + new_section + content[insert_idx:]
|
||||
else:
|
||||
content += new_section
|
||||
|
||||
with open("TASKS.md", "w") as f:
|
||||
f.write(content)
|
||||
|
||||
self.logger.info(f"Added {experiment_id} tasks to TASKS.md")
|
81
Agent/init_extensions.py
Normal file
81
Agent/init_extensions.py
Normal file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize VS Code extensions and set up lab environment.
|
||||
Run this with 'python -m Agent.init_extensions' or 'lab init-extensions'.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("lab-init")
|
||||
|
||||
def run_command(cmd, desc=None):
|
||||
"""Run a shell command and log the result"""
|
||||
if desc:
|
||||
logger.info(f"Running: {desc}")
|
||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
if desc:
|
||||
logger.info(f"✓ {desc} completed successfully")
|
||||
return True
|
||||
else:
|
||||
if desc:
|
||||
logger.error(f"✗ {desc} failed: {result.stderr}")
|
||||
return False
|
||||
|
||||
def setup_extensions():
|
||||
"""Set up VS Code extensions and GitHub CLI"""
|
||||
# Verify GitHub CLI is installed
|
||||
if run_command("gh --version", "Checking GitHub CLI"):
|
||||
logger.info("GitHub CLI is available")
|
||||
else:
|
||||
logger.error("GitHub CLI is not installed or not in PATH")
|
||||
return False
|
||||
|
||||
# Verify GitHub authentication
|
||||
if run_command("gh auth status", "Checking GitHub authentication"):
|
||||
logger.info("GitHub CLI is authenticated")
|
||||
else:
|
||||
logger.warning("GitHub CLI is not authenticated - Some features may not work")
|
||||
logger.info("Use 'gh auth login' to authenticate")
|
||||
|
||||
# Check if GitHub Issues extension is active
|
||||
if os.path.exists(os.path.expanduser("~/.vscode-server/extensions/github.vscode-pull-request-github-*")):
|
||||
logger.info("GitHub Pull Requests and Issues extension is installed")
|
||||
else:
|
||||
logger.warning("GitHub Pull Requests and Issues extension may not be installed")
|
||||
logger.info("Please check VS Code extensions")
|
||||
|
||||
# Create SESSION_LOG.md if it doesn't exist
|
||||
if not os.path.exists("SESSION_LOG.md"):
|
||||
with open("SESSION_LOG.md", "w") as f:
|
||||
f.write("# Lab Session Log\n\n")
|
||||
f.write("This file tracks lab activities and completed experiments.\n\n")
|
||||
logger.info("Created SESSION_LOG.md")
|
||||
|
||||
# Create Experiments directory if it doesn't exist
|
||||
os.makedirs("Experiments", exist_ok=True)
|
||||
logger.info("Verified Experiments directory exists")
|
||||
|
||||
# Create Data directory if it doesn't exist
|
||||
os.makedirs("Data", exist_ok=True)
|
||||
logger.info("Verified Data directory exists")
|
||||
|
||||
return True
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
logger.info("Initializing LabAgent environment...")
|
||||
if setup_extensions():
|
||||
logger.info("✅ LabAgent environment initialized")
|
||||
print("\nLabAgent is ready to use! Try asking:")
|
||||
print(" - Create a new siRNA screen experiment")
|
||||
print(" - Record data for experiment EXP-XXXXXX")
|
||||
print(" - What experiments are in progress?")
|
||||
else:
|
||||
logger.error("❌ Failed to initialize LabAgent environment")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
411
Agent/initial_push_2025-05-05
Normal file
411
Agent/initial_push_2025-05-05
Normal file
@ -0,0 +1,411 @@
|
||||
Implementation Plan: Chat-Driven Lab Management System
|
||||
|
||||
Overview and Goals
|
||||
|
||||
This plan outlines a smart, chat-driven lab management system for the the-jordan-lab/docs repository. The system will operate entirely within GitHub Codespaces, requiring no local setup. Users (students or researchers) will interact via natural language chat with an AI Cursor agent to manage lab protocols, experiments, and data. The agent will translate instructions into structured YAML files and perform actions through a deterministic Python task-runner. Key goals include:
|
||||
• Monorepo Organization: A unified repository structure (Protocols/, Projects/, Experiments/, Data/, Templates/, etc.) to centralize protocols, experiment plans, data, and templates for easy access and traceability . This eliminates scattered notes or “it’s on my machine” issues by keeping all records in one place, under version control.
|
||||
• Fully Containerized Environment: A .devcontainer configuration ensures a consistent, reproducible development environment on Codespaces . This allows any lab member to launch a ready-to-use workspace with all necessary tools (Python, libraries, etc.) pre-installed, avoiding manual setup.
|
||||
• Cursor Chat Agent with Function Calling: Use an AI assistant (Cursor) that interprets lab instructions (e.g. “Plan a PCR experiment using antibody X next week”) and produces structured outputs. The agent employs OpenAI-style function calling to output JSON/YAML actions, enabling reliable multi-step workflows where the AI can take actions like creating files or updating records . The structured output approach ensures the AI’s responses strictly conform to expected schemas for deterministic execution.
|
||||
• Deterministic Task Runner: A Python-based task-runner will consume the agent’s JSON instructions and execute them (create/edit YAML files, commit to git, etc.). This separation guarantees that while the AI suggests actions, only the controlled code performs changes, ensuring reproducibility and preventing non-deterministic AI direct edits. Every action is logged and versioned.
|
||||
• “Smart-Fill” Metadata Suggestions: The system will intelligently auto-populate metadata fields (e.g. reagents, conditions) to reduce user burden. It leverages:
|
||||
1. Vector embeddings of the repository’s own content – protocols and past experiment YAMLs – to find relevant context and similar entries for suggestions .
|
||||
2. User history to personalize suggestions (e.g. frequently used cell lines or instruments by that user).
|
||||
3. Lightweight RAG (Retrieval-Augmented Generation) from trusted external sources like PubMed for domain knowledge (e.g. auto-suggesting an antibody target or concentration from literature).
|
||||
• GitHub Integration & Automation: Tight integration with GitHub features for collaboration and oversight. The task-runner will auto-create Issues, Draft PRs, and commits with descriptive messages as needed. This ensures the lab PI and team have full visibility of changes and can review via normal GitHub workflows. Automation is favored (with auto-commit/auto-PR) to streamline usage, but the system will request user clarification when confidence in an action is low or ambiguous.
|
||||
• Persistent Tracking & Documentation: The repository will include Markdown documents to track tasks and status across AI sessions. This allows the AI (and users) to resume work with full context after interruptions. A human-readable “LabAgent Guide” will also be provided so any team member can understand how to use the system or pick up where someone left off.
|
||||
|
||||
By implementing these components, the lab management process becomes centralized, reproducible, and user-friendly, turning conversational instructions into documented lab actions with minimal friction.
|
||||
|
||||
System Architecture Overview
|
||||
|
||||
1. Codespace & Dev Container: The project will use GitHub Codespaces with a custom .devcontainer. The devcontainer includes all dependencies (Python, necessary libraries, vector DB, GitHub CLI, etc.) and configuration needed to run the agent and task-runner. This guarantees every user’s environment is identical and ready-to-go . When a user opens the repository in a Codespace, the container builds automatically, installing the AI agent backend and tools. No local installs are required – “it works on my machine” problems are eliminated by containerizing the dev environment .
|
||||
|
||||
2. Cursor Agent (LLM): At the core is a chat-driven AI agent (powered by a language model such as GPT-4 with function calling support). The agent runs in the Codespace (either via a VSCode extension or a CLI interface) and accepts natural language instructions. A custom function schema is defined for lab management actions (e.g. create_protocol, start_experiment, log_result, open_issue, etc.), each with a JSON schema for arguments. The agent is instructed (via system prompt) to use these functions to perform tasks rather than free-form text. For example, if the user says “Create a new protocol for cell staining with antibody ABC,” the agent will output a JSON invoking create_protocol{name: ..., type: ..., fields: ...}. This structured output approach (with JSON mode strict schemas) ensures the model’s output can be parsed deterministically . The Cursor agent essentially plans the steps and expresses them in a machine-readable form.
|
||||
|
||||
3. Python Task-Runner: A persistent Python process (started on container launch) listens for the agent’s function call outputs (perhaps via a file or IPC mechanism). When the agent emits a JSON action, the task-runner parses it and executes the corresponding Python function. This could be implemented with a loop reading actions from the agent’s API or via the Cursor MCP (Model Context Protocol) plugin interface  . Each function in the task-runner is carefully written to be idempotent and deterministic – performing exactly the file system or Git operation specified. For instance, create_protocol will scaffold a new YAML file in Protocols/ with the provided content, and open_issue will call the GitHub API or CLI to create an issue. After execution, the task-runner can feed a result back to the agent (e.g. success or any output) to continue multi-step workflows. This design lets the AI chain multiple actions (tool use) reliably, akin to an agentic workflow where the LLM can “think then act” in cycles until the instruction is fulfilled .
|
||||
|
||||
4. Data Storage (YAML & Git): All lab information is stored as human-editable text (YAML or Markdown) in the repository. The monorepo layout organizes this data (detailed in the next section). By using text-based YAML, we ensure that adding or editing entries (protocols, experiments, etc.) is transparent and trackable via Git diff. YAML is chosen for its readability and structure – it serves as a simple database for lab metadata. Notably, YAML protocols are already used in some lab automation contexts as a high-level experiment language , underscoring its suitability. The Git history provides an audit trail; every change by the agent is committed with a message, so one can trace what happened when, by whom (the agent will sign off with the user’s name or a bot identity). This versioning also aids reproducibility: if a protocol changes over time, one can retrieve exactly what protocol was used for an experiment by referencing commit IDs or tags.
|
||||
|
||||
5. Smart-Fill Recommendation Engine: Alongside the agent and runner, the architecture includes a metadata suggestion subsystem. This is composed of:
|
||||
• An Embedding Index built from the repository’s contents. On Codespace startup (or on demand), a script scans files in Protocols/ and sample YAML entries from Experiments/ and Projects/, converting them into vector embeddings. This uses a pre-trained text embedding model (e.g. SentenceTransformers or OpenAI embeddings). The result is a lightweight vector database (possibly just an in-memory FAISS index or a local SQLite via ChromaDB) that can be queried for similarity.
|
||||
• A History Tracker that maintains context about the current user and recent actions. For example, if user “Alice” has recently done several cell culture experiments, the system notes the cell lines or treatments she used frequently. This could be as simple as storing a JSON with per-user stats (e.g. last used protocol, commonly used reagent names) updated after each session.
|
||||
• External Knowledge Fetcher: A minimal integration to query external databases like PubMed or protocols.io when needed. This will not be a persistent heavy service, but rather an API call using something like Biopython’s Entrez or a requests call to NCBI E-utilities. For instance, if a user asks for a protocol that includes a specific antibody, and the agent doesn’t find it in local files, it might query PubMed for that antibody to gather suggested usage or metadata (publication info, recommended dilutions, etc.). Only trusted sources (PubMed, DOI resolvers) are used to avoid misinformation.
|
||||
|
||||
All these components come together such that a user can simply describe what they want, and the system handles the plan → action → record → commit cycle automatically. Below, we detail the implementation plan for each part of this architecture.
|
||||
|
||||
Repository Structure and Scaffolding
|
||||
|
||||
We will organize the-jordan-lab/docs as a monorepo containing all relevant subfolders for protocols, experiments, data, etc. The following folders (and key files) will be created in the root of the repository:
|
||||
• Protocols/ – contains standard operating procedures and experimental protocols. Each protocol is a YAML (or Markdown + YAML front-matter) file describing a repeatable procedure. Convention: Use a short descriptive name for the file, plus a version or date if needed (e.g. Protocols/cell_staining_v1.yaml). These files include fields like name, description, steps, materials, version, author, etc. For example, a protocol YAML might look like:
|
||||
|
||||
name: Cell Staining Protocol
|
||||
id: PROT-0001
|
||||
description: Protocol for immunofluorescence staining of cells
|
||||
author: Alice Smith
|
||||
created: 2025-05-05
|
||||
version: 1.0
|
||||
materials:
|
||||
- Antibody: Anti-XYZ (1:500 dilution)
|
||||
- Stain: DAPI
|
||||
- Buffer: PBS 1X
|
||||
steps:
|
||||
- "Fix cells with 4% PFA for 10 minutes."
|
||||
- "Wash 3x with PBS."
|
||||
- "Add primary antibody (Anti-XYZ) for 1 hour at RT."
|
||||
- "Wash 3x with PBS."
|
||||
- "Add DAPI stain for 5 minutes."
|
||||
- "Wash and image."
|
||||
notes: |
|
||||
This protocol is derived from Doe et al. 2023.
|
||||
|
||||
Each protocol gets a unique ID or name. We will include a brief README.md in Protocols/ explaining how to add new protocols via the agent and the YAML schema expected (for human reference).
|
||||
|
||||
• Projects/ – groups related experiments under broad project titles (e.g., a project might be “Tumor Growth Study 2025”). Each subfolder or YAML file in Projects/ outlines a project’s goals, team, and links to relevant experiments. We may use one YAML per project (e.g. Projects/tumor_growth_2025.yaml) containing metadata: title, description, lead, team_members, associated_protocols, and a list of experiment IDs under it. This helps organize experiments logically (one project to many experiments relationship).
|
||||
• Experiments/ – records of individual experiments or lab sessions. Each experiment is a YAML file (or folder) that captures the plan, execution, and outcome of an experiment. Convention: We use a timestamp or incremental ID in the filename for uniqueness, possibly prefixed by project or user. For example, Experiments/2025-05-10_cell_staining_Alice.yaml or Experiments/EXP-0002.yaml. The YAML fields include:
|
||||
• Reference to Protocol: e.g. protocol: cell_staining_v1 (which correlates with a file in Protocols/). If a protocol is modified for this experiment, the changes can be noted in a deviations: field.
|
||||
• Parameters/Metadata: e.g. date, researcher, sample_id, reagents_used, instrument, settings, etc. The agent’s Smart-Fill will attempt to populate these. For instance, if protocol is known and has expected reagents, it can auto-fill the reagents_used list.
|
||||
• Procedure Steps: Optionally, a list of steps (could be auto-copied from the protocol for completeness, then annotated with any changes).
|
||||
• Results: free-form notes or links to data outputs (if small data, possibly included; if large, stored in Data/).
|
||||
• Status: e.g. status: ongoing or completed or planned – to track the state.
|
||||
• Links: to project, or related experiments.
|
||||
Example snippet for an experiment YAML:
|
||||
|
||||
experiment_id: EXP-0002
|
||||
project: Tumor Growth Study 2025
|
||||
title: Staining Tumor Cells with Anti-XYZ
|
||||
date: 2025-05-10
|
||||
researcher: Alice Smith
|
||||
protocol: Cell Staining Protocol (v1.0)
|
||||
materials:
|
||||
Antibody: Anti-XYZ (lot #12345)
|
||||
Cell line: HeLa
|
||||
parameters:
|
||||
Cell_count: 1e5
|
||||
Incubation_time: 60 # minutes
|
||||
results:
|
||||
images: ["Data/Images/exp0002_image1.png", "Data/Images/exp0002_image2.png"]
|
||||
observations: "Strong fluorescence observed in nucleus."
|
||||
status: completed
|
||||
|
||||
The plan will include scaffolding a template experiment YAML in Templates/ (see below) that lists all required fields, which the agent can clone and fill for each new experiment to ensure completeness.
|
||||
|
||||
• Data/ – storage for data outputs or references to data. Large raw data might reside outside Git (e.g. on cloud or a drive), but small data files or processed results can be saved here. We will organize subfolders by experiment or project, for example Data/Images/EXP-0002/ for images from experiment 0002, or Data/Sequencing/ProjectX/... etc. If data is external, the YAML records in Experiments can contain pointers (URLs or filesystem paths) to where the data is stored. A README.md in Data/ will clarify how to add data or link external data (the agent could automate adding placeholders or verifying links).
|
||||
• Templates/ – contains starter templates for various YAML structures (protocol, experiment, project). For instance:
|
||||
• Templates/protocol_template.yaml with all fields blank or example values.
|
||||
• Templates/experiment_template.yaml with required sections (and perhaps comments).
|
||||
• Templates/project_template.yaml.
|
||||
The Cursor agent’s task-runner will use these templates when scaffolding new files to ensure consistency. Deterministic conventions (like which keys to include and in what order) come from these templates, so all YAML files follow a standard format. This reduces variability and makes it easier to parse or validate entries later.
|
||||
• Agent/ (or automation/ or similar) – this folder will hold the code for the AI agent integration. E.g., a Python module agent_runner.py for the task-runner, any utilities for embedding or PubMed queries, and perhaps prompt templates for the LLM. Keeping this code in the repo means it’s versioned and can be improved via pull requests like any other code. This folder can also include the function definitions (possibly in JSON format or as Python descriptors) that define the interface between the LLM and the functions.
|
||||
• Root files: In the repository root, we’ll add:
|
||||
• A detailed README.md explaining the repository purpose and structure. It will outline each directory and how they fit into lab workflows (essentially summarizing parts of this plan for end-users). It will emphasize that this is an electronic lab notebook / management system and how to get started with it in Codespaces.
|
||||
• Documentation files for the agent system:
|
||||
• LAB_AGENT_GUIDE.md (name tentative): Documentation for users on how to interact with the chat agent. For example, how to phrase requests, what the agent can do, and tips (like “you can ask the agent to show an experiment summary or to search protocols by keyword”). It will also describe the fallback behavior (when the agent might ask questions) so users know what to expect.
|
||||
• TASKS.md: A Markdown task tracker (details in a later section) listing outstanding development tasks or lab to-dos. This might be used both for continuing the implementation of the system and for high-level lab tasks that the AI can help manage. The idea is to enable the AI (and humans) to see a to-do list and mark items done across sessions.
|
||||
• CHANGELOG.md or STATUS_LOG.md: A log that the system (and users) update each session to summarize what was done. For example, each time the agent runs a major command, it appends “2025-05-10: Created experiment EXP-0002 (Cell Staining) via agent for Alice.” Keeping this log in markdown ensures that if the conversation context is lost, the next session can quickly reconstruct it by reading the recent log. It also provides the PI a quick way to see recent activity at a glance without combing through individual commits.
|
||||
|
||||
Scaffolding these folders and files will be the first step. We will create stub files (even if empty or with placeholder text) for templates and documentation so that everything is in place. With this deterministic structure, the AI agent always knows where to put or find things. For example, when asked to create a new protocol, it knows to place a YAML in Protocols/ and update any relevant index or list.
|
||||
|
||||
This monorepo approach centralizes all experimental knowledge. As Labguru advertises, centralizing experiments, protocols, and data in one hub improves collaboration and eliminates lost information . Our structure echoes that philosophy: experiments link to protocols and data, projects link to experiments, etc., all under one version-controlled roof.
|
||||
|
||||
Codespaces Environment and DevContainer Setup
|
||||
|
||||
To make the system fully operational inside GitHub Codespaces, we define a .devcontainer configuration in the repository. This includes at minimum a devcontainer.json and a Dockerfile or image specification that sets up:
|
||||
• Base Image & OS: Use an image like mcr.microsoft.com/devcontainers/python:3.10 (for Python environment) or a lightweight Ubuntu with Python. The image should have git and basic tools.
|
||||
• Python Environment: Install Python 3 and required pip packages. These likely include:
|
||||
• OpenAI SDK (for calling the GPT API, if using OpenAI’s service for the agent).
|
||||
• LangChain or similar (optional, for structured output handling or vector store management).
|
||||
• faiss-cpu or chromadb (for embeddings storage and similarity search).
|
||||
• PyYAML (for reading/writing YAML files).
|
||||
• GitPython or GitHub’s gh CLI (to automate Git and GitHub actions if not using direct CLI).
|
||||
• biopython (for PubMed Entrez API) or requests for external queries.
|
||||
• Any Cursor-specific agent library if needed (if Cursor provides a Python package to interface with MCP or agent API).
|
||||
• Possibly small utility libraries for text processing, etc.
|
||||
• VS Code Extensions: The devcontainer can recommend/install extensions such as:
|
||||
• GitHub Codespaces / Dev Containers extension (usually default).
|
||||
• YAML extension for nice editing.
|
||||
• Python extension for coding.
|
||||
• If available, a Cursor extension or GitHub Copilot Chat – anything to facilitate the chat interface in the VSCode environment. If Cursor has an extension for VSCode, include it (or instructions to connect to Cursor).
|
||||
• Environment Variables: We’ll configure any needed environment vars. For example, OPENAI_API_KEY (which the user would supply via Codespaces secrets for security). Or a GITHUB_TOKEN (Codespaces provides a token by default for the repo, which can be used with gh CLI to auth to that repository’s scope). The devcontainer might include an .env or use the Codespaces secrets to ensure the agent can authenticate to required services (OpenAI, GitHub).
|
||||
• Post-create Setup: Use the postCreateCommand to run setup tasks, such as:
|
||||
• Index the repository content for embeddings (so the vector store is ready).
|
||||
• Possibly launch the agent backend. For example, start the Python task-runner or MCP server. We might run a command like python agent/agent_runner.py --serve & to have it listening in the background.
|
||||
• Run any migrations or checks (e.g., ensure the Templates folder has the latest schema, or verify YAMLs).
|
||||
• Print a welcome message with next steps (maybe a reminder to open the chat interface).
|
||||
|
||||
Once configured, any contributor can open a Codespace and within minutes have a fully functional AI-assisted lab notebook environment. The reproducibility is key: “Dev Containers… ensure every developer uses the same environment, eliminating the ‘works on my machine’ problem” . In our case, it ensures every student in the lab has the same tools and sees the same AI behavior.
|
||||
|
||||
Notably, no local installation is necessary. If someone prefers local development, they could use VS Code with the Remote - Containers extension to instantiate the same devcontainer locally. But the target is to use GitHub Codespaces in the cloud for ease. This means even on an iPad or a low-power laptop, a user can access the full system via a browser.
|
||||
|
||||
We also ensure that the Codespace has no external server dependencies: all logic runs inside (the LLM calls go out to OpenAI or are handled by Cursor’s service, but we are not hosting our own server outside). The agent and task-runner run within the container. There’s no need to deploy separate web services or databases – we rely on the GitHub platform (issues, PRs) for collaboration and lightweight local stores (YAML files, embeddings index) for data.
|
||||
|
||||
Cursor Agent & Function-Calling Task Orchestration
|
||||
|
||||
The Cursor agent is the AI assistant that interprets user instructions and decides which actions to perform. To implement this reliably, we will use OpenAI’s function calling JSON protocol (or an equivalent) to constrain the agent’s output to a set of pre-defined actions . This ensures determinism and safety – the agent can’t execute arbitrary code or make changes unless it’s through one of our vetted functions.
|
||||
|
||||
Defining Functions (Actions): We enumerate the main actions the agent should handle in a JSON schema format to register with the LLM. For example:
|
||||
• create_protocol(name: str, purpose: str, steps: list, materials: list, version: str): Creates a new protocol YAML in Protocols/. The agent will fill in fields like steps and materials if provided (or use template placeholders).
|
||||
• create_experiment(project: str, protocol: str, date: str, researcher: str, parameters: dict): Creates a new experiment record in Experiments/. The agent should supply a unique ID or date for the experiment. Many of these arguments can come from conversation (or be guessed by Smart-Fill).
|
||||
• update_experiment(id: str, results: str): Log results or update status of an experiment.
|
||||
• suggest_metadata(field: str, context: str): A special function where the agent can call into the Smart-Fill system. This might trigger the Python side to do an embedding lookup or external search and return suggestions. (This could also be handled implicitly by the agent’s knowledge, but having a tool function allows deterministic retrieval from external sources.)
|
||||
• open_issue(title: str, body: str): Creates a GitHub Issue for tracking. E.g., if a user says “Flag this for PI review,” the agent might call this to open an issue.
|
||||
• open_pr(branch: str, title: str, body: str): Creates a Draft Pull Request. Typically used after significant changes (like adding multiple files for a new project) to request review. The task-runner can gather all uncommitted changes on a branch and push them, then open PR.
|
||||
• commit_and_push(message: str): Commits current staged changes with the given message and pushes to GitHub. (The workflow could be commit after each atomic action or batch commit; we will likely commit at logical milestones to keep history readable.)
|
||||
|
||||
These function definitions (with their parameter schema and documentation strings) will be given to the LLM in the system message or via the Cursor MCP interface. This way, when the user instructs something that maps to an action, the model will choose to output a function call JSON.
|
||||
|
||||
Example Interaction:
|
||||
User: “I’m starting a new experiment tomorrow using the cell staining protocol on sample 123, please set it up.”
|
||||
System/Agent (internally): The agent parses this and decides it needs to create a new experiment entry. It gathers details: protocol = “cell staining protocol”, date = tomorrow’s date, researcher = (from user profile or ask), sample id = 123. It might also recall from embeddings that “cell staining protocol” expects an antibody and cell line, and use Smart-Fill to guess or ask. Finally, it responds not in natural language but with a function call, e.g.:
|
||||
|
||||
{
|
||||
"function": "create_experiment",
|
||||
"arguments": {
|
||||
"project": "Unassigned",
|
||||
"protocol": "Cell Staining Protocol (v1.0)",
|
||||
"date": "2025-05-06",
|
||||
"researcher": "Alice",
|
||||
"parameters": {
|
||||
"sample_id": "123",
|
||||
"cell_line": "HeLa",
|
||||
"antibody": "Anti-XYZ"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
The Python task-runner receives this JSON and executes create_experiment.
|
||||
|
||||
Python Task-Runner Implementation: In agent/agent_runner.py, we will implement a loop (or use an asynchronous event system if integrated with Cursor’s MCP) to handle incoming function calls. For each function:
|
||||
• Log the action to console or a log file (for debugging/audit).
|
||||
• Perform the action:
|
||||
• For file creation, use template files from Templates/, fill in the YAML fields from args, and write to the appropriate path. Mark the new file with a new experiment ID if not provided (the runner can generate the next ID by simple increment or timestamp).
|
||||
• For updates, open the YAML, modify the necessary fields, and save.
|
||||
• For suggestion retrieval (if suggest_metadata is called), call the embedding search or PubMed API accordingly (more details in Smart-Fill section) and return the results to the agent. The agent might then decide which suggestion to use and continue the function calls.
|
||||
• For Git operations, use the GitHub CLI (gh) or a library. For example, open_issue can run gh issue create -t "title" -b "body", or use PyGithub if we prefer Python. Similarly, commit_and_push can run shell git commands or use a library to commit and push.
|
||||
• Handle errors gracefully: if something fails (e.g. file already exists, or network error opening an issue), catch it and send a message back to the agent (the LLM) indicating failure. The agent can then relay to user or attempt a different approach. Ensuring error messages are succinct and useful (e.g. “Failed to create file, it may already exist”) will help if the AI or user needs to intervene.
|
||||
|
||||
After each significant action, the task-runner can optionally call back to the agent with a brief result. For example, after create_experiment, it could respond with a message like: “Experiment EXP-0002 created in Experiments/. Ready for additional details.” or supply the new experiment ID as return value. This can be done via the OpenAI function calling mechanism by returning a value. The agent might use that to inform its next message to the user.
|
||||
|
||||
The Cursor MCP route: If using Cursor’s Model Context Protocol, we would implement an MCP server in the container that Cursor (the editor) connects to. This MCP server would expose endpoints corresponding to our functions. The advantage of MCP is tighter integration (Cursor can call them as if the AI decided to), and it allows using Cursor’s UI features. Either approach (OpenAI API or MCP) results in similar behavior. Since the question references “Cursor’s function-calling JSON protocol,” it suggests we can use either OpenAI’s API with JSON mode or Cursor’s own mechanism. We will plan for OpenAI’s API usage for generality, but note that the devcontainer can support running Cursor’s own environment if needed.
|
||||
|
||||
Determinism and Schema Enforcement: We will use the strict: true setting for structured outputs if available . This means the model is required to produce exactly the JSON schema for function arguments. The benefit is 100% reliable parsing of outputs into our functions (no misformatted JSON) . By constraining the AI to these schemas, we essentially get a guarantee that the agent’s “thoughts” manifest as concrete, reproducible actions in our system, not just suggestions.
|
||||
|
||||
Multi-step Workflows: The user’s request may require multiple steps. The agent can call a sequence of functions in a single conversation turn or multiple turns. For instance, “Set up a new project for RNA extraction and create two experiments in it” might lead to:
|
||||
1. create_project (makes a project YAML).
|
||||
2. create_experiment (for experiment 1 under that project).
|
||||
3. create_experiment (for experiment 2).
|
||||
4. open_pr (open a draft PR with all these new files, maybe tagging the PI).
|
||||
|
||||
The agent will do this stepwise, possibly asking for confirmation or missing info in between. The task-runner will execute each in order. If any step requires more info (say the project description wasn’t provided), the agent could either guess (Smart-Fill could provide a generic description) or ask the user for that detail before proceeding.
|
||||
|
||||
This agent-runner loop essentially forms an automated lab assistant. It’s important that by default, it tries to fill in blanks automatically (using intelligent defaults or suggestions) to avoid pestering the user. Only when something is truly ambiguous or critical (confidence is low) will it pause to ask (see Automation vs Clarification below for strategy).
|
||||
|
||||
Security Considerations: All actions are local to the repo or via GitHub API with the user’s token, so there’s minimal risk. We ensure the agent cannot execute arbitrary shell commands beyond our allowed functions. It also should not have internet access beyond what we explicitly code (like PubMed queries), which prevents it from doing unapproved external calls or data leak. (In a production setting, we’d further sandbox this if needed.)
|
||||
|
||||
Smart-Fill Metadata Suggestion System
|
||||
|
||||
One highlight of this system is Smart-Fill, which reduces the manual effort in providing complete metadata for protocols and experiments. This system combines local knowledge and external references to suggest likely values for fields.
|
||||
|
||||
A. Vector Embeddings of Repo Content:
|
||||
We will preprocess the content of our protocols and templates to create an embedding index. For example:
|
||||
• For each protocol in Protocols/, compute an embedding of its text (including name, description, steps). Store these in an index keyed by protocol ID.
|
||||
• For each existing experiment YAML, embed its contents or at least key fields (title, protocol used, materials, results summary).
|
||||
• For each project, embed its description and scope.
|
||||
• Possibly also embed any lab glossary or inventory if available (e.g., list of antibodies the lab commonly uses, list of cell lines, etc. – this could simply be another YAML file we maintain).
|
||||
|
||||
We’ll use a model like OpenAI’s text-embedding-ada-002 or a local alternative (to avoid external calls, maybe a locally hosted MiniLM or SBERT model). The embeddings (vectors) are stored in memory or a small database file. The suggest_metadata function in our runner can query this index with a question or partially known info.
|
||||
|
||||
Use Cases of Embeddings:
|
||||
• Protocol Suggestion: If a user describes an experiment aim but doesn’t specify a protocol, the agent can search the protocol embeddings for relevant ones. E.g., user says “I want to count cells after treatment” – the agent finds a “Cell Counting” protocol in the index as a likely match.
|
||||
• Parameter Guessing: If an experiment is created with a known protocol, the agent can look up similar experiments (via embeddings of experiment descriptions) to see what parameters were used. For instance, if doing “cell staining on HeLa cells,” and previously an experiment had cell_line: HeLa and used antibody X at 1:500, it might suggest the same antibody and dilution if context matches.
|
||||
• Preventing omissions: The vector search can be used to ensure completeness. Suppose an experiment YAML is missing the antibody field but protocol suggests one – the agent can notice from the protocol text that an antibody is needed and prompt or auto-fill it.
|
||||
• Contextual answers: If the user asks a question like “What was the result of the last experiment using protocol Y?”, the agent can embed the query and find the relevant experiment record to answer.
|
||||
|
||||
Because these suggestions come from the lab’s own data, they are highly relevant and help maintain consistency. As one reference notes, “embeddings help LLMs generate more precise responses by retrieving contextually relevant information” . We are applying that by feeding the agent with relevant snippets when needed. The agent can incorporate retrieved data either directly into its function arguments or as additional context in the conversation.
|
||||
|
||||
B. Per-User Activity History:
|
||||
We will maintain a simple log or profile for each user (perhaps identified by their GitHub username or a provided name). This could be a JSON in Agent/user_profiles.json with entries like:
|
||||
|
||||
{
|
||||
"alice": {
|
||||
"last_active": "2025-05-05",
|
||||
"frequent_protocols": ["Cell Staining Protocol", "Flow Cytometry Protocol"],
|
||||
"frequent_samples": ["HeLa", "Mouse fibroblast"],
|
||||
"recent_experiments": ["EXP-0002", "EXP-0005"]
|
||||
},
|
||||
"bob": {
|
||||
...
|
||||
}
|
||||
}
|
||||
|
||||
This data can be updated automatically: each time an experiment is created, add to that user’s recent experiments; each time they use a protocol, increment a counter. The agent can use this to tailor suggestions. For example, if Alice usually works with HeLa cells, the agent might default to cell_line: HeLa in a new experiment unless told otherwise. Or for Bob, maybe default to a different cell type.
|
||||
|
||||
This personalization makes the agent feel more “assistant-like” and further reduces repetition. It also helps disambiguation: if two possible protocols fit a request but one is the user’s favorite, pick that.
|
||||
|
||||
C. External RAG (e.g. PubMed):
|
||||
For expanding beyond the lab’s internal knowledge, we integrate a minimal retrieval from external sources:
|
||||
• PubMed queries: We can use NCBI’s API to fetch article titles or abstracts related to a keyword. For instance, if a user mentions a gene or compound unknown to our system, the agent can do suggest_metadata("What is XYZ compound used for?") which our runner handles by querying PubMed for “XYZ compound protocol” or similar. The results (top 1-3) could be returned to the agent, which might glean that “Compound XYZ is often used as a staining agent in concentration 5 µM ” etc. The agent can then use that info to fill in details or cite sources.
|
||||
• Protocols.io or other repositories: If internet access is allowed, the agent can search protocols.io via API (SciNote integration suggests many protocols are easily importable ). We won’t focus on heavy integration due to time, but in future, the agent could fetch a template protocol from protocols.io if the lab doesn’t have one internally.
|
||||
• Safety and Trust: Only use well-known databases (PubMed, maybe ArXiv for methods) to avoid retrieving from random web sources. The assistant should cite or log any external info used, for transparency. Perhaps in the experiment notes it can add “Suggestion from PubMed: [citation]”.
|
||||
|
||||
Smart-Fill Workflow:
|
||||
When the agent is about to call a function but lacks some info, it has options:
|
||||
1. Check embeddings: e.g., it has protocol name and wants default parameters – it queries similar experiments/protocols.
|
||||
2. If embeddings yield clear result, fill the field automatically and proceed.
|
||||
3. If still uncertain, attempt an external query if appropriate (e.g., unknown term).
|
||||
4. If still uncertain or multiple possibilities, ask the user. For example: “I see two possible antibodies (Anti-ABC and Anti-XYZ) used in similar contexts. Which one are you using?” The agent would only reach this step if automation fails to give a confident answer.
|
||||
|
||||
Confidence can be gauged by embedding similarity score or by whether a field is critical and no data found. We’ll design simple thresholds (e.g., if cosine similarity > 0.8 for a suggestion, assume it’s good; if less, ask). This aligns with the requirement: prefer automation and auto-confirmation, with clarification only when confidence is low.
|
||||
|
||||
Metadata Completeness:
|
||||
Upon creating or updating any YAML, the task-runner can include a validation step. Using a template or schema (possibly defined in Templates/ or via a JSON Schema in code), verify required fields are present (and not placeholders). If something is missing, the runner can prompt the agent for a follow-up. E.g., if after filling everything an experiment YAML still has cell_line: TBD or no results when marked completed, it can alert the agent or user. This ensures high-quality records. Essentially, the Smart-Fill tries to ensure that by the time an experiment is marked “completed,” all fields (like date, protocol, materials, results, etc.) are filled and meaningful. This emphasis on completeness and accuracy improves reproducibility – future lab members can read the record and know exactly what was done .
|
||||
|
||||
As an example of Smart-Fill in action: A student says “I treated cells with DrugX for 24 hours.” The agent creates an experiment entry. The Smart-Fill might detect “DrugX” is not in our protocols, so it queries PubMed. It finds an article about DrugX usage. From that, it guesses the treatment concentration might be, say, 10 µM and the cell line used was MCF-7 (just hypothetical). It can fill treatment: DrugX 10µM for 24h and perhaps add a note “(suggested parameters from Doe et al. 2024)”. If the student or PI later sees this and wants to adjust, they can – but the system provided a starting point automatically, which might otherwise require searching literature manually.
|
||||
|
||||
By combining internal data and external references, the lab agent becomes a proactive assistant, not just a passive recorder. It helps novices fill out forms correctly and reminds experienced users of details they might overlook.
|
||||
|
||||
GitHub Integration: Issues, PRs, and Commits
|
||||
|
||||
To integrate with the lab’s existing workflow and ensure PI visibility, we leverage GitHub features via automation:
|
||||
|
||||
Automated Git Commits: Every change made through the agent is committed to the repository. We will configure the task-runner to stage and commit files at appropriate intervals. Likely approaches:
|
||||
• One commit per high-level command: e.g., the user says “Record experiment results,” which triggers updating two files – the experiment YAML and maybe a summary in a project file. The runner can commit both together with message "Add results for EXP-0002 (Cell Staining experiment)".
|
||||
• Auto commit after creation: When a new protocol or experiment file is created, commit it immediately with message like "Create new protocol: Cell Staining Protocol v1.0" or "Log experiment EXP-0003 (staining test)".
|
||||
• Structured Commit Messages: We might adopt a consistent format for commit messages to make them scannable. For example, prefix with the type of action: Protocol: for protocol additions, Experiment: for experiment updates, etc. e.g., "Experiment: EXP-0002 created for cell staining assay". We can also allow the agent to draft the commit message since it knows the context; however, to keep it deterministic, the task-runner could assemble the message from known parameters (like using the file’s title or ID).
|
||||
• The Cursor agent or the Cursor editor might have an AI commit message feature , but since we want determinism, we’ll rely on our own controlled messaging.
|
||||
|
||||
Git Branching and Pull Requests:
|
||||
By default, the system could commit to the default branch (e.g., main) for immediate record-keeping. However, for oversight, we might prefer changes to go to a branch and open a PR for review. Two modes are possible:
|
||||
• Direct Commit Mode: Simpler tasks (adding an experiment log, small updates) commit directly to main with the assumption that the PI trusts these incremental notes (just as they’d trust a student writing in a paper notebook). Since everything is logged, any issue can be fixed via another commit.
|
||||
• Pull Request Mode: Significant or potentially sensitive changes (like adding a new protocol or a large batch of edits) trigger a Draft PR. The task-runner will create a new branch (maybe named lab-agent/<feature> or <user>-<task>), push the changes, then open a PR with a summary. The PR description can be generated by the agent, listing what was done, and perhaps tagging @PI (the PI’s GitHub handle) for review. Mark it as Draft if not ready for merge. The PI can then review the changes in a familiar interface, comment, request changes, or approve and merge. The agent could monitor the PR status and report back if merged or if changes are requested (though that may be beyond MVP).
|
||||
|
||||
We will integrate with GitHub using either:
|
||||
• GitHub CLI (gh): which is straightforward inside Codespaces. E.g. gh issue create ... or gh pr create .... We’ll ensure gh is authenticated (Codespaces usually provides a token). This avoids handling tokens in code.
|
||||
• PyGithub or GraphQL API: a Python library to create issues/PRs programmatically. This might be slightly more complex to implement but allows more fine-grained control within our Python runner (e.g., check if an issue exists, etc.). For our plan, gh CLI is sufficient and simpler.
|
||||
|
||||
Issue Creation:
|
||||
Issues can be used for various purposes:
|
||||
• Task tracking: If the agent encounters something it can’t do automatically or needs human input later, it could open an issue as a reminder. For instance, “Experiment EXP-0005 lacks results – awaiting data” could be an issue assigned to the student to fill in results later.
|
||||
• PI notifications: The system might open an issue to notify about a new project or a completed experiment. The PI (if subscribed) gets an email. The issue body can contain a summary, and perhaps the PI can respond with feedback right there.
|
||||
• Feature requests/bugs: On the development side, if the AI fails to parse something or an error occurs, it could log it as an issue for developers to fix the agent. This way improvement needs don’t get lost.
|
||||
|
||||
Automatic Linking: We can have the agent include references in commit messages or issue bodies to tie things together. E.g., commit message “Experiment EXP-0002… (see #12)” to refer to issue #12 about that experiment’s review. Or in an issue describing a project, include links to the YAML files or PRs.
|
||||
|
||||
Mirroring with Gitea: The plan notes that Gitea is passively mirroring, so we don’t need to do anything for Gitea specifically. We just push to GitHub; the mirror container will update Gitea. So effectively, all data is also available on the lab’s internal Gitea server for backup. We should ensure not to use any GitHub-specific feature that doesn’t mirror well. However, issues and PRs won’t mirror to Gitea (since it’s just a git mirror). The lab should be aware that the single source of truth for issues/PRs is GitHub (or at least the PI should check GitHub, not Gitea, for those). We’ll clarify that in documentation.
|
||||
|
||||
PI Visibility & Notifications:
|
||||
Once these integrations are in place, the PI can simply watch the repository on GitHub to get notifications of all commits and issues. Additionally, by involving the PI in PRs or having them assigned to oversee certain issue labels (like “review needed”), we create a workflow where nothing significant happens without the PI seeing it. The PI can also browse the Markdown logs (CHANGELOG.md or the commit history) at any time to see a chronological list of what the lab has done recently, ensuring transparency. This addresses the need for PI visibility with minimal friction: the students don’t have to separately email updates or fill out reports – the system automatically produces those updates in the normal course of using it.
|
||||
|
||||
Persistent Task Tracking and Session Management
|
||||
|
||||
To facilitate working across multiple sessions (since AI context is not persistent unless stored) and enable resuming work seamlessly, we will implement Markdown-based trackers and logs.
|
||||
|
||||
Task Tracker (TASKS.md):
|
||||
This file will list ongoing tasks or implementation steps, potentially in a checklist format. There can be two sections: Development Tasks (for building out this system itself) and Lab Tasks (for actual lab work to be done via the system). The AI agent can reference and update this file. For example:
|
||||
|
||||
## Lab Tasks
|
||||
- [x] Set up project "Tumor Growth Study 2025" (created by lab agent on 2025-05-01)
|
||||
- [ ] Run Experiment EXP-0007 (cell viability assay) – **in progress** (assigned to Bob)
|
||||
- [ ] Analyze results of EXP-0005 and generate report – *pending data*
|
||||
|
||||
## Development Tasks
|
||||
- [x] Scaffold repository structure (done in initial commit)
|
||||
- [x] Implement create_experiment function
|
||||
- [ ] Integrate PubMed metadata suggestions
|
||||
- [ ] Write user guide documentation
|
||||
|
||||
The agent can mark items as done ([x]) when completed. For instance, after it finishes integrating PubMed suggestions, it would check that off (and perhaps add a line in a commit message referencing the task). This provides continuity – if the agent session ends and restarts, it can load TASKS.md to see what remains. It also helps a human collaborator see progress at a glance without diving into commit logs.
|
||||
|
||||
Session Log (SESSION_LOG.md or CHANGELOG.md):
|
||||
This will be appended with each session or major action. We might structure it by date:
|
||||
|
||||
# Lab Agent Activity Log
|
||||
|
||||
## 2025-05-05 Session (Alice)
|
||||
- Created protocol "Cell Staining Protocol v1.0" via agent.
|
||||
- Created experiment EXP-0002 using "Cell Staining Protocol" for sample 123.
|
||||
- Auto-filled experiment metadata (antibody Anti-XYZ, cell line HeLa).
|
||||
- Committed changes and opened PR #5 for PI review.
|
||||
|
||||
## 2025-05-06 Session (Alice)
|
||||
- Added results to EXP-0002 (observations and images).
|
||||
- Marked EXP-0002 as completed. Commit abcdef1.
|
||||
- PI approved PR #5 and merged.
|
||||
|
||||
This log provides full context to resume work at any time with full context. If the same or another user comes back a week later, they can read the latest session entry to recall what the agent did and what is pending. The agent itself, on start, can be programmed to read the last N lines of this log and incorporate that into its system message (so it knows the recent history without needing the conversation memory). This is critical because the AI model’s conversational memory won’t persist across sessions unless explicitly given.
|
||||
|
||||
We will have the agent update this log as part of its workflow. Possibly, after every high-level user command is done, append a bullet in the log file summarizing it. The task-runner can facilitate this (since it’s safer for the runner to write to files than trusting the AI to phrase it consistently).
|
||||
|
||||
Resuming Context:
|
||||
When starting a new conversation with the agent (say the next day), the system can:
|
||||
• Inject the content of TASKS.md and the last session’s log as part of the prompt (system or assistant message) to give the AI the context of what’s happening.
|
||||
• The user doesn’t have to repeat where they left off; they can say “Let’s continue with the viability assay” and the agent will understand from the log which experiment that is referring to and what the status is.
|
||||
|
||||
Documentation for Continuation:
|
||||
We will document this mechanism in the user guide (LAB_AGENT_GUIDE.md). For example, instruct the user: “If you come back to the project after some time, read the Lab Agent Activity Log above to recall context. You can ask the agent what’s the status of my experiments? – it will summarize using the log and current data. The agent keeps this log updated so you don’t have to.” This way, even if a different student takes over or assists, they can quickly get up to speed.
|
||||
|
||||
Finally, all these markdown files (TASKS.md, SESSION_LOG.md, etc.) are also visible on GitHub, meaning the PI or any collaborator can view them outside Codespaces too. This layered documentation ensures that even outside the AI interface, the project’s state is well-communicated.
|
||||
|
||||
Automation vs. User Clarification Strategy
|
||||
|
||||
To meet the requirement of preferring automation with minimal user prompts, we design the agent’s behavior as follows:
|
||||
• Auto-Execution by Default: For any well-understood instruction, the agent will proceed to carry it out fully without asking “Are you sure?” or “Should I do it?”. It will confirm by performing the action (and the user will see the result in the repository or via a brief summary message). For instance, “Log that I added 1 µL of reagent X” -> the agent finds the experiment YAML, updates it, commits “Update EXP-0003: added reagent X detail” – and then tells the user “Noted: I added that detail to EXP-0003.” No extra confirmation needed because it’s a straightforward update.
|
||||
• Implicit Confirmation: In cases where an action is reversible or minor (most git-tracked changes are reversible), the agent just does them. Users can always fix via another command if needed. This keeps the interaction flowing and avoids interrupting the user for permission frequently.
|
||||
• When to Ask for Clarification: The agent will pause and ask the user only when:
|
||||
• It’s truly unsure how to proceed and the consequence of guessing wrong might be significant/confusing. For example, user says “schedule experiment next Monday” but there are two experiments that could be meant – the agent might ask “Do you want to create a new experiment entry for next Monday, or schedule an existing one?”.
|
||||
• A required piece of info is missing that Smart-Fill cannot confidently supply. E.g., user says “do X with antibody” but doesn’t name the antibody, and multiple antibodies are possible. The agent might say: “Which antibody will you use? (e.g., Anti-ABC or Anti-XYZ)”.
|
||||
• The user’s request is unusual or potentially dangerous (not likely in lab context, but if user asked to delete a project, the agent should confirm since that’s destructive).
|
||||
• Confidence Thresholds: The agent’s decision to auto-fill vs ask can be guided by confidence measures:
|
||||
• If using OpenAI functions, the model itself might indicate uncertainty (“I think it’s X”). We can parse that. If not, we rely on our Smart-Fill scores. For example, if the top embedding match for a missing parameter has a high similarity and clearly fits, we auto-use it. If two matches are close or low similarity, we then ask.
|
||||
• For numeric or scientific suggestions (like a concentration), if the agent finds conflicting values from sources, better to ask the user or at least present the suggestion as a question: “I assumed 10 µM as the concentration based on literature – let me know if that’s correct.”
|
||||
• Auto-Confirmation of Actions: After an action, the agent does usually describe what it did (“I’ve created the experiment entry with ID EXP-0007 and filled in the details.”). This serves as an implicit confirmation to the user that it interpreted the request correctly. The user can always say “Actually, change X…” if they notice something off. This design aligns with a helpful assistant that takes initiative yet remains responsive to corrections.
|
||||
|
||||
By minimizing explicit questions to the user, the workflow becomes efficient – the student can rattle off a series of instructions and trust the agent to handle them. Only occasionally will the agent ping them for clarification. This reduces friction especially for routine tasks. It’s akin to a real lab assistant who mostly knows what to do and only asks when absolutely necessary.
|
||||
|
||||
Of course, during initial deployment, we’ll monitor if the agent maybe should ask more often in certain cases (to avoid assumptions). We can tune this by adjusting the agent prompt (for example, giving it guidelines on when to ask vs act).
|
||||
|
||||
Ensuring Reproducibility and Metadata Quality
|
||||
|
||||
Reproducibility is a top priority in lab work. Our system reinforces this in several ways:
|
||||
• Comprehensive Metadata Capture: Every experiment’s YAML is structured to capture who, what, when, how, and results. By enforcing templates and using Smart-Fill to populate them, we ensure fields aren’t left blank. The agent will include as much detail as possible (including environmental conditions, instrument settings if mentioned, etc.). This addresses the concern that “details about experiments… are quickly forgotten unless they are written down” . The system diligently writes everything down in the notebook (YAML), so nothing relies on memory.
|
||||
• Protocol Linking and Versioning: Experiments reference protocols by name and version. If a protocol is updated, a new version file can be created (and the old one kept). The experiment continues to point to the version it used. This way, years later one can see the exact procedure used. We could even have the agent automatically record the git commit hash of the protocol file at time of use (to absolutely pin the version). This might be overkill, but it’s an idea.
|
||||
• Validation of Entries: The task-runner can include a validate function that runs after an experiment is marked completed to check that it has results and conclusion. Similarly for protocols: check that steps are not empty, etc. If something’s missing, tag the YAML or open an issue. E.g., if a student forgot to fill “conclusion” in an experiment, the system might open an issue “Please add conclusion for EXP-0007” or leave a TODO in the file. This ensures completeness before experiments are considered done.
|
||||
• PI Review Workflow: By involving the PI via PRs or even periodic review of the logs, we introduce a human check. The PI might notice if something is odd (like an experiment missing a control) and can comment. The agent can then help the student address that (maybe via a new experiment for the control).
|
||||
• Minimal Friction for Students: All the above is achieved with minimal extra work for students because the agent does the heavy lifting. The interface is just a chat. Students don’t need to remember to fill every field – if they forget, the agent either fills it or reminds them. The tedious parts of record-keeping (formatting, structuring, committing) are automated. This lowers the barrier to maintaining good records (one of the biggest challenges in research). The system essentially nudges users into good data practices by automating them.
|
||||
• Reproducible Environment for Execution: If any code or analysis is part of experiments, the devcontainer ensures that running analysis scripts (if added to the repo) will yield the same results environment-wise. This goes beyond lab wet-work, but it’s worth noting for completeness: e.g., if an experiment includes an analysis Jupyter notebook, the container has the packages to run it, making even computational parts reproducible.
|
||||
• Documentation for Users and PI: We’ll write a CONTRIBUTING.md or an onboarding doc for new students explaining this system’s purpose: emphasize that it’s an electronic lab notebook and task manager, why writing everything (via the agent) is important, and how it benefits them (searchable history, easier report writing, etc.). Also a note to PIs on how to get their reports from it (maybe instruct on using GitHub’s interface to filter by user or date, or to use the logs to compile results).
|
||||
|
||||
In summary, by combining structured data capture, automated suggestion, and integrated review, the system will greatly enhance the completeness and reliability of lab records. Students can focus on science rather than paperwork, while the PI can be confident that no key detail has been omitted from the records. As one system demonstrated, using structured YAML protocols can even drive automated lab equipment reliably ; in our case, it drives record-keeping and planning with the same rigor.
|
||||
|
||||
Documentation and Next Steps
|
||||
|
||||
Finally, we prepare documentation to ensure the system is maintainable and users can learn it quickly:
|
||||
• User Guide (Lab Agent Guide): As mentioned, a Markdown guide explaining how to use the chat interface, with examples:
|
||||
• e.g. “To create a protocol, just tell the agent e.g. ‘Create a protocol for solution preparation.’ The agent will walk you through or auto-complete the details.”
|
||||
• List of things the agent can do (create/edit/list/search).
|
||||
• How to phrase questions vs commands.
|
||||
• Troubleshooting: what to do if the agent seems stuck or makes a mistake (e.g., manually edit the file, or revert a commit, etc., and let the agent know).
|
||||
• How the GitHub integration works (so they’re not surprised by auto commits or issues).
|
||||
• Remind them to always push changes if they do anything manually.
|
||||
• Developer Guide: Although the primary audience is lab users, we include some notes (maybe in the repository README or a separate DEV_NOTES.md) about the system’s architecture for future maintainers. E.g., instructions to update the function schema if needed, or how to upgrade the embedding model, etc. Since this is a long-lived lab tool, eventually someone might need to tweak it (for example, if OpenAI API changes or if they switch to another LLM provider). Clear comments in code and a high-level doc will facilitate this.
|
||||
• Resuming Work Documentation: In the README or Guide, explicitly mention that all progress is saved in the repository, and one can resume by reading TASKS.md and SESSION_LOG.md. Encourage committing these frequently (the agent will do so anyway). Essentially, “the system never forgets because it writes everything down”, so users should trust the logs more than their memory when resuming.
|
||||
• GitHub Usage Documentation: A short section on how to use issues/PRs produced. For instance, if an issue is opened for them by the agent, they should know to close it once done or comment. If a PR is opened, they should know how to view the changes and merge if appropriate. Not all students might be familiar with GitHub PRs, so a brief intro could help (or link to GitHub docs).
|
||||
• No External Servers: Document that the system runs fully in Codespaces and pushes to GitHub. If the lab’s Gitea is down or inaccessible, it doesn’t affect using the agent (aside from mirror). And conversely, if Codespaces is down, one can still access the data on Gitea (but the agent wouldn’t be running). This is more of an FYI for the PI about redundancy.
|
||||
|
||||
With all pieces in place – repository structure, devcontainer, agent & runner, smart-fill, integration, and docs – we will have a robust production-ready lab management system. It will have the following tangible outcomes:
|
||||
• Folders and files scaffolded (protocols, experiments, etc., with templates).
|
||||
• Working chat interface in Codespaces where the agent responds to lab commands.
|
||||
• Example use case executed (perhaps in the README, illustrate creating a protocol and an experiment and show the resulting YAML and commit).
|
||||
• Version control integration tested (ensuring commits and PRs happen correctly).
|
||||
• Smart-Fill suggestions validated with a few test queries (maybe add a dummy protocol and see if it suggests it when querying).
|
||||
|
||||
Finally, after implementation, we’ll likely do a dry run with a lab member’s actual experiment to fine-tune any issues. But the plan as detailed covers the blueprint to implement this step-by-step.
|
||||
|
||||
To conclude, this plan provides a comprehensive path to deploy the smart lab assistant in the-jordan-lab/docs. By capitalizing on modern LLM capabilities within a structured, containerized framework, we greatly streamline lab workflows while maintaining rigorous documentation standards. This meets the lab’s needs for completeness, reproducibility, and ease of use, transforming the GitHub repository into a living lab notebook maintained through natural conversation and intelligent automation.
|
||||
|
||||
Sources:
|
||||
• Dev Containers for consistent Codespaces environments 
|
||||
• OpenAI function calling for structured, multi-step tool use 
|
||||
• YAML protocols as structured experiment scripts in automation 
|
||||
• Importance of embeddings in retrieval-augmented responses 
|
||||
• Labguru on centralizing experiments, protocols, and data for teamwork 
|
||||
• Need for detailed record-keeping in lab notebooks 
|
403
Agent/lab.py
Normal file
403
Agent/lab.py
Normal file
@ -0,0 +1,403 @@
|
||||
import os
|
||||
import yaml
|
||||
import shutil
|
||||
import hashlib
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("lab")
|
||||
|
||||
def create_experiment_with_plate(
|
||||
experiment_id: str,
|
||||
aim: str,
|
||||
project: str,
|
||||
researcher: str,
|
||||
plate: Dict[str, Any],
|
||||
sample_preparation: Optional[Dict[str, Any]] = None,
|
||||
downstream_application: Optional[Dict[str, Any]] = None,
|
||||
status: str = "in_progress"
|
||||
) -> str:
|
||||
"""
|
||||
Create a new experiment with plate layout information.
|
||||
|
||||
Args:
|
||||
experiment_id: Unique identifier for the experiment (EXP-YYYYMMDD format)
|
||||
aim: Brief description of the experiment's goal
|
||||
project: Project this experiment belongs to
|
||||
researcher: GitHub username or name of the researcher
|
||||
plate: Dictionary with plate ID and layout information
|
||||
sample_preparation: Optional dictionary with sample preparation details
|
||||
downstream_application: Optional dictionary with downstream application details
|
||||
status: Current status of the experiment (default: in_progress)
|
||||
|
||||
Returns:
|
||||
Path to the created experiment file
|
||||
"""
|
||||
logger.info(f"Creating experiment {experiment_id} for {researcher}")
|
||||
|
||||
# Create basic experiment structure
|
||||
experiment = {
|
||||
"experiment_id": experiment_id,
|
||||
"aim": aim,
|
||||
"project": project,
|
||||
"researcher": researcher,
|
||||
"status": status,
|
||||
"created": datetime.now().strftime("%Y-%m-%d"),
|
||||
"plate": plate,
|
||||
"tasks": []
|
||||
}
|
||||
|
||||
# Add optional sections if provided
|
||||
if sample_preparation:
|
||||
experiment["sample_preparation"] = sample_preparation
|
||||
|
||||
if downstream_application:
|
||||
experiment["downstream_application"] = downstream_application
|
||||
|
||||
# Create data directory for this experiment if it doesn't exist
|
||||
data_dir = os.path.join("Data", experiment_id)
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
# Save the experiment file
|
||||
experiment_file = f"Experiments/{experiment_id}.yaml"
|
||||
with open(experiment_file, "w") as f:
|
||||
yaml.dump(experiment, f, sort_keys=False)
|
||||
|
||||
logger.info(f"Experiment file created: {experiment_file}")
|
||||
return experiment_file
|
||||
|
||||
def add_experiment_task(experiment_id: str, task_title: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Add a task to an experiment and create a corresponding GitHub Issue.
|
||||
|
||||
Args:
|
||||
experiment_id: ID of the experiment to add task to
|
||||
task_title: Title of the task/Issue
|
||||
|
||||
Returns:
|
||||
Dictionary with the created issue details
|
||||
"""
|
||||
logger.info(f"Adding task '{task_title}' to experiment {experiment_id}")
|
||||
|
||||
# Create GitHub Issue
|
||||
cmd = f'gh issue create --title "{task_title}" --body "Experiment: {experiment_id}\n\nTask for experiment {experiment_id}"'
|
||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Failed to create GitHub Issue: {result.stderr}")
|
||||
raise Exception(f"Failed to create GitHub Issue: {result.stderr}")
|
||||
|
||||
# Extract issue number from URL
|
||||
issue_url = result.stdout.strip()
|
||||
issue_number = issue_url.split("/")[-1]
|
||||
|
||||
# Add issue to project board
|
||||
add_to_project_cmd = f'gh issue edit {issue_number} --add-project "Lab Tasks Board"'
|
||||
subprocess.run(add_to_project_cmd, shell=True)
|
||||
|
||||
# Update experiment YAML
|
||||
experiment_file = f"Experiments/{experiment_id}.yaml"
|
||||
with open(experiment_file, "r") as f:
|
||||
experiment = yaml.safe_load(f)
|
||||
|
||||
# Add task to tasks array
|
||||
task = {
|
||||
"id": issue_number,
|
||||
"title": task_title,
|
||||
"status": "open"
|
||||
}
|
||||
|
||||
if "tasks" not in experiment:
|
||||
experiment["tasks"] = []
|
||||
|
||||
experiment["tasks"].append(task)
|
||||
|
||||
with open(experiment_file, "w") as f:
|
||||
yaml.dump(experiment, f, sort_keys=False)
|
||||
|
||||
logger.info(f"Added task {issue_number} to experiment {experiment_id}")
|
||||
return task
|
||||
|
||||
def record_data(exp_id: str, file_path: str, data_type: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Record data file for an experiment. This function:
|
||||
1. Computes checksum of the file
|
||||
2. Copies file to Data/{exp_id}/ if not already there
|
||||
3. Adds entry to the experiment's YAML data section
|
||||
|
||||
Args:
|
||||
exp_id: Experiment ID
|
||||
file_path: Path to the data file
|
||||
data_type: Type of data (e.g., qPCR, flow_cytometry, imaging)
|
||||
|
||||
Returns:
|
||||
The data entry added to the experiment
|
||||
"""
|
||||
logger.info(f"Recording {data_type} data for experiment {exp_id}")
|
||||
|
||||
# Verify the experiment exists
|
||||
experiment_file = f"Experiments/{exp_id}.yaml"
|
||||
if not os.path.exists(experiment_file):
|
||||
raise FileNotFoundError(f"Experiment {exp_id} not found")
|
||||
|
||||
# Make sure the data directory exists
|
||||
data_dir = os.path.join("Data", exp_id)
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
# Compute file checksum
|
||||
with open(file_path, "rb") as f:
|
||||
file_data = f.read()
|
||||
sha256 = hashlib.sha256(file_data).hexdigest()
|
||||
|
||||
# Get just the filename
|
||||
filename = os.path.basename(file_path)
|
||||
|
||||
# Construct target path in the data directory
|
||||
target_path = os.path.join(data_dir, filename)
|
||||
|
||||
# Copy file if it's not already in the data directory
|
||||
if os.path.abspath(file_path) != os.path.abspath(target_path):
|
||||
shutil.copy2(file_path, target_path)
|
||||
logger.info(f"Copied data file to {target_path}")
|
||||
|
||||
# Update the experiment YAML
|
||||
with open(experiment_file, "r") as f:
|
||||
experiment = yaml.safe_load(f)
|
||||
|
||||
# Create the data entry
|
||||
data_entry = {
|
||||
"path": f"Data/{exp_id}/{filename}",
|
||||
"type": data_type,
|
||||
"sha256": sha256,
|
||||
"added": datetime.now().strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
# Add to data array
|
||||
if "data" not in experiment:
|
||||
experiment["data"] = []
|
||||
|
||||
experiment["data"].append(data_entry)
|
||||
|
||||
# Save the updated experiment file
|
||||
with open(experiment_file, "w") as f:
|
||||
yaml.dump(experiment, f, sort_keys=False)
|
||||
|
||||
# Commit the changes
|
||||
commit_cmd = f'git add "{target_path}" "{experiment_file}" && git commit -m "Data: add {data_type} result for {exp_id}"'
|
||||
subprocess.run(commit_cmd, shell=True)
|
||||
|
||||
logger.info(f"Recorded data file {filename} for experiment {exp_id}")
|
||||
return data_entry
|
||||
|
||||
def close_task(issue_number: int) -> None:
|
||||
"""
|
||||
Close a GitHub Issue and update the corresponding experiment task status.
|
||||
|
||||
Args:
|
||||
issue_number: The GitHub Issue number to close
|
||||
"""
|
||||
logger.info(f"Closing task (issue #{issue_number})")
|
||||
|
||||
# Close the GitHub issue
|
||||
close_cmd = f'gh issue close {issue_number}'
|
||||
result = subprocess.run(close_cmd, shell=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Failed to close GitHub Issue #{issue_number}")
|
||||
return
|
||||
|
||||
# Find which experiment this issue belongs to
|
||||
experiments_dir = "Experiments"
|
||||
for filename in os.listdir(experiments_dir):
|
||||
if not filename.endswith(".yaml"):
|
||||
continue
|
||||
|
||||
file_path = os.path.join(experiments_dir, filename)
|
||||
with open(file_path, "r") as f:
|
||||
experiment = yaml.safe_load(f)
|
||||
|
||||
if "tasks" not in experiment:
|
||||
continue
|
||||
|
||||
# Check if this issue is in the tasks
|
||||
for i, task in enumerate(experiment["tasks"]):
|
||||
if str(task.get("id")) == str(issue_number):
|
||||
# Update task status
|
||||
experiment["tasks"][i]["status"] = "closed"
|
||||
|
||||
# Save the updated experiment
|
||||
with open(file_path, "w") as f:
|
||||
yaml.dump(experiment, f, sort_keys=False)
|
||||
|
||||
logger.info(f"Updated task status in experiment {experiment['experiment_id']}")
|
||||
|
||||
# Commit the change
|
||||
commit_cmd = f'git add "{file_path}" && git commit -m "Task: close issue #{issue_number} for {experiment["experiment_id"]}"'
|
||||
subprocess.run(commit_cmd, shell=True)
|
||||
return
|
||||
|
||||
logger.warning(f"Could not find task {issue_number} in any experiment")
|
||||
|
||||
def finish_experiment(exp_id: str) -> bool:
|
||||
"""
|
||||
Finish an experiment by:
|
||||
1. Verifying all tasks are closed
|
||||
2. Checking data exists
|
||||
3. Setting status to completed
|
||||
4. Updating SESSION_LOG.md
|
||||
|
||||
Args:
|
||||
exp_id: Experiment ID to finish
|
||||
|
||||
Returns:
|
||||
True if experiment was successfully completed, False otherwise
|
||||
"""
|
||||
logger.info(f"Attempting to finish experiment {exp_id}")
|
||||
|
||||
# Verify the experiment exists
|
||||
experiment_file = f"Experiments/{exp_id}.yaml"
|
||||
if not os.path.exists(experiment_file):
|
||||
logger.error(f"Experiment {exp_id} not found")
|
||||
return False
|
||||
|
||||
# Load the experiment
|
||||
with open(experiment_file, "r") as f:
|
||||
experiment = yaml.safe_load(f)
|
||||
|
||||
# Check if all tasks are closed
|
||||
if "tasks" in experiment:
|
||||
open_tasks = [task for task in experiment["tasks"] if task.get("status") != "closed"]
|
||||
if open_tasks:
|
||||
task_ids = [task.get("id") for task in open_tasks]
|
||||
logger.warning(f"Cannot finish experiment: open tasks remain: {task_ids}")
|
||||
return False
|
||||
|
||||
# Check if data exists
|
||||
if "data" not in experiment or not experiment["data"]:
|
||||
logger.warning(f"Cannot finish experiment: no data recorded")
|
||||
return False
|
||||
|
||||
# Update status to completed
|
||||
experiment["status"] = "completed"
|
||||
experiment["completed"] = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Save the updated experiment
|
||||
with open(experiment_file, "w") as f:
|
||||
yaml.dump(experiment, f, sort_keys=False)
|
||||
|
||||
# Update SESSION_LOG.md
|
||||
session_log_path = "SESSION_LOG.md"
|
||||
if not os.path.exists(session_log_path):
|
||||
with open(session_log_path, "w") as f:
|
||||
f.write("# Lab Session Log\n\n")
|
||||
|
||||
with open(session_log_path, "a") as f:
|
||||
f.write(f"\n## {datetime.now().strftime('%Y-%m-%d')} - {experiment['researcher']}\n")
|
||||
f.write(f"- Completed experiment {exp_id}: {experiment['aim']}\n")
|
||||
f.write(f"- Data files: {len(experiment['data'])}\n")
|
||||
|
||||
# Commit the changes
|
||||
commit_cmd = f'git add "{experiment_file}" "{session_log_path}" && git commit -m "Experiment: completed {exp_id}"'
|
||||
subprocess.run(commit_cmd, shell=True)
|
||||
|
||||
logger.info(f"Successfully completed experiment {exp_id}")
|
||||
return True
|
||||
|
||||
def list_experiments(status_filter=None):
|
||||
"""
|
||||
List all experiments, optionally filtered by status.
|
||||
|
||||
Args:
|
||||
status_filter: Optional filter for experiment status ('in_progress', 'completed', etc.)
|
||||
|
||||
Returns:
|
||||
List of experiment dictionaries
|
||||
"""
|
||||
experiments = []
|
||||
experiments_dir = "Experiments"
|
||||
|
||||
if not os.path.exists(experiments_dir):
|
||||
return []
|
||||
|
||||
for filename in os.listdir(experiments_dir):
|
||||
if not filename.endswith(".yaml"):
|
||||
continue
|
||||
|
||||
file_path = os.path.join(experiments_dir, filename)
|
||||
with open(file_path, "r") as f:
|
||||
experiment = yaml.safe_load(f)
|
||||
|
||||
if status_filter is None or experiment.get("status") == status_filter:
|
||||
experiments.append(experiment)
|
||||
|
||||
return experiments
|
||||
|
||||
def record_cli():
|
||||
"""Entry point for lab-record CLI command"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Record data for an experiment")
|
||||
parser.add_argument("--exp", required=True, help="Experiment ID")
|
||||
parser.add_argument("--file", required=True, help="Path to data file")
|
||||
parser.add_argument("--type", required=True, help="Type of data")
|
||||
|
||||
args = parser.parse_args()
|
||||
try:
|
||||
result = record_data(args.exp, args.file, args.type)
|
||||
print(f"Data recorded successfully: {result['path']}")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
exit(1)
|
||||
|
||||
def main():
|
||||
"""Entry point for lab CLI command"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Lab Management CLI")
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# Record data command
|
||||
record_parser = subparsers.add_parser("record", help="Record data for an experiment")
|
||||
record_parser.add_argument("--exp", required=True, help="Experiment ID")
|
||||
record_parser.add_argument("--file", required=True, help="Path to data file")
|
||||
record_parser.add_argument("--type", required=True, help="Type of data")
|
||||
|
||||
# Close task command
|
||||
close_parser = subparsers.add_parser("close-task", help="Close a task/issue")
|
||||
close_parser.add_argument("--issue", type=int, required=True, help="Issue number to close")
|
||||
|
||||
# Finish experiment command
|
||||
finish_parser = subparsers.add_parser("finish", help="Mark an experiment as completed")
|
||||
finish_parser.add_argument("--exp", required=True, help="Experiment ID to finish")
|
||||
|
||||
# List experiments command
|
||||
list_parser = subparsers.add_parser("list", help="List experiments")
|
||||
list_parser.add_argument("--status", help="Filter by status")
|
||||
|
||||
# Initialize command (mostly for documentation, actual impl. is in init_extensions.py)
|
||||
init_parser = subparsers.add_parser("init-extensions", help="Initialize extensions and environment")
|
||||
|
||||
# Parse arguments and execute
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "record":
|
||||
record_data(args.exp, args.file, args.type)
|
||||
elif args.command == "close-task":
|
||||
close_task(args.issue)
|
||||
elif args.command == "finish":
|
||||
finish_experiment(args.exp)
|
||||
elif args.command == "list":
|
||||
experiments = list_experiments(args.status)
|
||||
for exp in experiments:
|
||||
print(f"{exp.get('experiment_id', 'No ID')} - {exp.get('aim', 'No description')} - {exp.get('status', 'unknown')}")
|
||||
elif args.command == "init-extensions":
|
||||
print("Please run 'lab-init-extensions' instead")
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
# Command-line interface
|
||||
if __name__ == "__main__":
|
||||
main()
|
21
Agent/setup.py
Normal file
21
Agent/setup.py
Normal file
@ -0,0 +1,21 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name="lab",
|
||||
version="0.1",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"pyyaml",
|
||||
"chromadb",
|
||||
"biopython",
|
||||
"gitpython",
|
||||
],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"lab=Agent.lab:main",
|
||||
"lab-record=Agent.lab:record_cli",
|
||||
"lab-init-extensions=Agent.init_extensions:main",
|
||||
],
|
||||
},
|
||||
python_requires=">=3.8",
|
||||
)
|
@ -1,42 +0,0 @@
|
||||
experiment_id: EXP-0225
|
||||
project: Post-transcriptional regulation by Ybx1
|
||||
subproject: mRNA stability measurement after Ybx1 knockdown
|
||||
title: Initial Ybx1 knockdown effect on mRNA stability (reverse transfection)
|
||||
date: 2025-05-06
|
||||
researcher: Jack Zhao
|
||||
protocol: Ybx1 knockdown mRNA stability assay v1.0
|
||||
materials:
|
||||
siRNA_Ybx1: Dharmacon ON-TARGETplus Human YBX1 siRNA SMARTpool (10 nM)
|
||||
siRNA_Control: Dharmacon ON-TARGETplus Non-targeting Control Pool (10 nM)
|
||||
Lipofectamine: RNAiMAX (1.5 µL per well)
|
||||
Opti-MEM: 100 µL per well (for complexing)
|
||||
Actinomycin_D: 5 µg/mL (Sigma)
|
||||
Cell_line: HEK293T cells (3 × 10^5 cells per well)
|
||||
parameters:
|
||||
transfection_method: Reverse transfection
|
||||
plate_type: 24-well
|
||||
total_wells: 24 (4 timepoints x 2 conditions x 3 replicates)
|
||||
qPCR_targets: Ybx1, Myc, p53, GAPDH (control), 18S rRNA (control)
|
||||
actinomycin_D_time_points: 0, 2, 4, 8 hours
|
||||
expected_duration: 4 days (May 6-9, 2025)
|
||||
results:
|
||||
observations: "TBD"
|
||||
data_location: "Data/RNA_stability/EXP-0225/"
|
||||
status: planned
|
||||
notes: |
|
||||
Timeline:
|
||||
- Day 1 (May 6, 2025): Reverse transfection setup
|
||||
- Day 3 (May 8, 2025): Confirm knockdown by qPCR
|
||||
- Day 3 (May 8, 2025): Add actinomycin D and collect timepoints
|
||||
- Day 4 (May 9, 2025): RNA extraction, cDNA synthesis
|
||||
- RNA collection and qPCR analysis for days 3-4 by Jack Zhao
|
||||
|
||||
Special considerations:
|
||||
- We will use reverse transfection to improve efficiency and reduce handling steps
|
||||
- Actinomycin D is toxic; use proper PPE and dispose of waste in designated containers
|
||||
- All timepoints after actinomycin D addition should be processed rapidly to minimize RNA degradation
|
||||
- Plan to freeze samples at -80°C if all timepoints cannot be processed on the same day
|
||||
|
||||
Next steps:
|
||||
- May 10, 2025: Data analysis and calculation of mRNA half-lives
|
||||
- Based on results, plan follow-up experiments with expanded gene set or modified conditions
|
@ -1,16 +0,0 @@
|
||||
experiment_id: EXP-0002
|
||||
project: Tumor Growth Study 2025
|
||||
title: Staining Tumor Cells with Anti-XYZ
|
||||
date: 2025-05-10
|
||||
researcher: Alice Smith
|
||||
protocol: Cell Staining Protocol (v1.0)
|
||||
materials:
|
||||
Antibody: Anti-XYZ (lot #12345)
|
||||
Cell line: HeLa
|
||||
parameters:
|
||||
Cell_count: 1e5
|
||||
Incubation_time: 60 # minutes
|
||||
results:
|
||||
images: ["Data/Images/exp0002_image1.png", "Data/Images/exp0002_image2.png"]
|
||||
observations: "Strong fluorescence observed in nucleus."
|
||||
status: completed
|
@ -1,29 +0,0 @@
|
||||
experiment_id: EXP-0100
|
||||
project: RNAi Knockdown Screen
|
||||
title: 24-well Plate siRNA Transfection (4 siRNAs, RNAiMAX)
|
||||
date: 2024-06-08
|
||||
researcher: Lab Agent
|
||||
protocol: 24-well Plate siRNA Transfection (RNAiMAX) v1.0
|
||||
materials:
|
||||
siRNA#1: 10 nM final
|
||||
siRNA#2: 10 nM final
|
||||
siRNA#3: 10 nM final
|
||||
siNC: 10 nM final (negative control)
|
||||
Lipofectamine RNAiMAX: 1.5 µL/well
|
||||
Opti-MEM: 100 µL/well (for complexing)
|
||||
Cells: 5x10^4/well in 24-well plate
|
||||
parameters:
|
||||
plate_type: 24-well
|
||||
total_wells: 24
|
||||
siRNA_per_well: 10 pmol
|
||||
rnaimax_per_well: 1.5 µL
|
||||
opti_mem_per_well: 100 µL
|
||||
incubation_time: 24-72 hours
|
||||
results:
|
||||
images: []
|
||||
observations: "TBD"
|
||||
status: planned
|
||||
notes: |
|
||||
- siNC is a non-targeting negative control siRNA.
|
||||
- Each siRNA transfected in separate wells, following standard RNAiMAX protocol.
|
||||
- See protocol PROT-0020 for detailed steps and solution preparation.
|
@ -1,21 +0,0 @@
|
||||
# Experiments Folder
|
||||
|
||||
This folder contains records of individual experiments or lab sessions as YAML files.
|
||||
|
||||
## How to Add a New Experiment
|
||||
- Use the experiment_template.yaml in Templates/ as a starting point.
|
||||
- Name your file with a unique ID or date, e.g., `2025-05-10_cell_staining_Alice.yaml`.
|
||||
- Fill in all required fields: experiment_id, project, title, date, researcher, protocol, materials, parameters, results, status.
|
||||
- Submit via the lab agent or manually, then commit to the repository.
|
||||
|
||||
## YAML Schema Reference
|
||||
See `Templates/experiment_template.yaml` for the required structure.
|
||||
|
||||
## Example Experiment
|
||||
See `2025-05-10_cell_staining_Alice.yaml` in this folder for a complete example of an experiment file. Use it as a reference when creating new experiments.
|
||||
|
||||
### Example Usage
|
||||
To add a new experiment, you can:
|
||||
1. Use the lab agent and describe your experiment in natural language (e.g., "Log a cell staining experiment for Alice on May 10, 2025").
|
||||
2. The agent will generate a YAML file similar to `2025-05-10_cell_staining_Alice.yaml`.
|
||||
3. Review and edit as needed, then commit the file.
|
@ -1,12 +0,0 @@
|
||||
# Projects Folder
|
||||
|
||||
This folder contains project records, grouping related experiments under broad project titles.
|
||||
|
||||
## How to Add a New Project
|
||||
- Use the project_template.yaml in Templates/ as a starting point.
|
||||
- Name your file descriptively, e.g., `tumor_growth_2025.yaml`.
|
||||
- Fill in all required fields: title, description, date_started, lead, team_members, associated_protocols, experiments, notes.
|
||||
- Submit via the lab agent or manually, then commit to the repository.
|
||||
|
||||
## YAML Schema Reference
|
||||
See `Templates/project_template.yaml` for the required structure.
|
@ -1,21 +0,0 @@
|
||||
# Experiments Folder
|
||||
|
||||
This folder contains records of individual experiments or lab sessions as YAML files.
|
||||
|
||||
## How to Add a New Experiment
|
||||
- Use the experiment_template.yaml in Templates/ as a starting point.
|
||||
- Name your file with a unique ID or date, e.g., `2025-05-10_cell_staining_Alice.yaml`.
|
||||
- Fill in all required fields: experiment_id, project, title, date, researcher, protocol, materials, parameters, results, status.
|
||||
- Submit via the lab agent or manually, then commit to the repository.
|
||||
|
||||
## YAML Schema Reference
|
||||
See `Templates/experiment_template.yaml` for the required structure.
|
||||
|
||||
## Example Experiment
|
||||
See `2025-05-10_cell_staining_Alice.yaml` in this folder for a complete example of an experiment file. Use it as a reference when creating new experiments.
|
||||
|
||||
### Example Usage
|
||||
To add a new experiment, you can:
|
||||
1. Use the lab agent and describe your experiment in natural language (e.g., "Log a cell staining experiment for Alice on May 10, 2025").
|
||||
2. The agent will generate a YAML file similar to `2025-05-10_cell_staining_Alice.yaml`.
|
||||
3. Review and edit as needed, then commit the file.
|
@ -1,26 +0,0 @@
|
||||
title: mRNA stability measurement after Ybx1 knockdown
|
||||
description: Investigation of mRNA stability changes in selected genes after Ybx1 knockdown, using siRNA and actinomycin D to assess post-transcriptional regulation
|
||||
date_started: 2025-05-06
|
||||
lead: Dr. Jim Jordan
|
||||
team_members:
|
||||
- Dr. Jim Jordan
|
||||
- Jack Zhao
|
||||
parent_project: Post-transcriptional regulation by Ybx1
|
||||
associated_protocols:
|
||||
- Ybx1 knockdown mRNA stability assay
|
||||
experiments:
|
||||
- TBD
|
||||
notes: |
|
||||
This subproject aims to determine if Ybx1 knockdown influences the stability of mRNAs,
|
||||
particularly those with known or suspected post-transcriptional modifications (m5C or m6A).
|
||||
|
||||
Specific aims:
|
||||
1. Establish efficient Ybx1 knockdown using siRNA
|
||||
2. Measure half-lives of selected target mRNAs in control vs. Ybx1 knockdown conditions
|
||||
3. Identify genes most affected by Ybx1 depletion
|
||||
4. Correlate mRNA stability changes with RNA modifications if possible
|
||||
|
||||
Initial target genes include Myc, p53, and other transcripts with known post-transcriptional regulation.
|
||||
|
||||
This experiment will employ actinomycin D to block transcription, followed by time-course
|
||||
sampling and RT-qPCR to measure the decay rates of specific mRNAs.
|
@ -1,17 +0,0 @@
|
||||
title: Post-transcriptional regulation by Ybx1
|
||||
description: Project to understand how Ybx1 influences post-transcriptional regulation of gene expression, with emphasis on mRNA stability
|
||||
date_started: 2025-05-06
|
||||
lead: Dr. Jim Jordan
|
||||
team_members:
|
||||
- Dr. Jim Jordan
|
||||
- Jack Zhao
|
||||
associated_protocols:
|
||||
- siRNA transfection protocol
|
||||
- Actinomycin D mRNA stability protocol
|
||||
experiments:
|
||||
- TBD
|
||||
notes: |
|
||||
This project will investigate the role of Ybx1 in regulating mRNA stability,
|
||||
with particular focus on genes involved in cellular processes. Ybx1 has been
|
||||
shown to interact with m5C and m6A modifications on mRNAs, suggesting a role
|
||||
in post-transcriptional gene regulation.
|
187
Analysis/EXP-0225_mRNA_stability_analysis.R
Normal file
187
Analysis/EXP-0225_mRNA_stability_analysis.R
Normal file
@ -0,0 +1,187 @@
|
||||
# EXP-0225 mRNA Stability Analysis Script
|
||||
# Analysis of YBX1 knockdown effect on mRNA half-life in Huh7 and HepG2 cells
|
||||
# Authors: james-m-jordan, jack-zhao
|
||||
# Date: 2025-05-12
|
||||
|
||||
# Load required libraries
|
||||
library(tidyverse)
|
||||
library(rtracklayer)
|
||||
library(ggplot2)
|
||||
library(cowplot)
|
||||
library(DESeq2)
|
||||
|
||||
# Set paths
|
||||
experiment_id <- "EXP-0225"
|
||||
data_dir <- file.path("Data", experiment_id, "raw")
|
||||
output_dir <- file.path("Data", experiment_id, "figures")
|
||||
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# Load qPCR data
|
||||
# Assuming format: sample, gene, timepoint, Ct, cell_line, treatment
|
||||
qpcr_data <- read_excel(file.path(data_dir, "timecourse_qPCR.xlsx"))
|
||||
|
||||
# Load RNA concentration data
|
||||
rna_data <- read_excel(file.path(data_dir, "RNA_concentrations.xlsx"))
|
||||
|
||||
# Define reference genes
|
||||
ref_genes <- c("GAPDH", "ACTB")
|
||||
target_genes <- c("IL6", "MYC")
|
||||
|
||||
# Calculate delta Ct (normalize to reference genes)
|
||||
calculate_delta_ct <- function(df) {
|
||||
# First calculate average Ct of reference genes per sample
|
||||
ref_data <- df %>%
|
||||
filter(gene %in% ref_genes) %>%
|
||||
group_by(sample, timepoint, cell_line, treatment) %>%
|
||||
summarize(ref_ct = mean(Ct), .groups = "drop")
|
||||
|
||||
# Calculate delta Ct for all genes
|
||||
df %>%
|
||||
left_join(ref_data, by = c("sample", "timepoint", "cell_line", "treatment")) %>%
|
||||
mutate(delta_ct = Ct - ref_ct)
|
||||
}
|
||||
|
||||
# Calculate delta-delta Ct (relative to t=0)
|
||||
calculate_relative_expression <- function(df) {
|
||||
# First get delta Ct values
|
||||
delta_ct_df <- calculate_delta_ct(df)
|
||||
|
||||
# For each gene, cell line, and treatment, get the t=0 value
|
||||
t0_values <- delta_ct_df %>%
|
||||
filter(timepoint == "0h") %>%
|
||||
select(gene, cell_line, treatment, delta_ct) %>%
|
||||
rename(delta_ct_0 = delta_ct)
|
||||
|
||||
# Calculate delta-delta Ct and relative expression (2^-ddCt)
|
||||
delta_ct_df %>%
|
||||
left_join(t0_values, by = c("gene", "cell_line", "treatment")) %>%
|
||||
mutate(
|
||||
delta_delta_ct = delta_ct - delta_ct_0,
|
||||
rel_expr = 2^(-delta_delta_ct),
|
||||
ln_rel_expr = log(rel_expr)
|
||||
)
|
||||
}
|
||||
|
||||
# Function to calculate mRNA half-life
|
||||
calculate_half_life <- function(expr_data) {
|
||||
# Convert timepoint to numeric hours
|
||||
expr_data <- expr_data %>%
|
||||
mutate(
|
||||
hours = case_when(
|
||||
timepoint == "0h" ~ 0,
|
||||
timepoint == "1h" ~ 1,
|
||||
timepoint == "2h" ~ 2,
|
||||
timepoint == "4h" ~ 4,
|
||||
timepoint == "8h" ~ 8,
|
||||
TRUE ~ NA_real_
|
||||
)
|
||||
)
|
||||
|
||||
# Calculate half-life for each gene, cell line, and treatment
|
||||
expr_data %>%
|
||||
filter(!is.na(hours)) %>%
|
||||
group_by(gene, cell_line, treatment) %>%
|
||||
do({
|
||||
# Fit linear model: ln(expression) ~ time
|
||||
model <- lm(ln_rel_expr ~ hours, data = .)
|
||||
# Extract slope (k)
|
||||
k <- coef(model)[2]
|
||||
# Calculate half-life: t1/2 = ln(2)/|k|
|
||||
t_half <- log(2)/abs(k)
|
||||
|
||||
# Return results
|
||||
tibble(
|
||||
slope = k,
|
||||
half_life = t_half,
|
||||
r_squared = summary(model)$r.squared,
|
||||
p_value = summary(model)$coefficients[2,4]
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
# PLACEHOLDER: Data processing steps (to be filled with actual data)
|
||||
# 1. Read and process data
|
||||
# normalized_data <- calculate_relative_expression(qpcr_data)
|
||||
|
||||
# 2. Calculate half-lives
|
||||
# half_lives <- calculate_half_life(normalized_data)
|
||||
|
||||
# 3. Compare half-lives: control vs YBX1 knockdown
|
||||
# half_life_comparison <- half_lives %>%
|
||||
# select(gene, cell_line, treatment, half_life) %>%
|
||||
# pivot_wider(
|
||||
# names_from = treatment,
|
||||
# values_from = half_life,
|
||||
# names_prefix = "t_half_"
|
||||
# ) %>%
|
||||
# mutate(
|
||||
# ratio = t_half_siYBX1 / t_half_siCTRL,
|
||||
# percent_change = (ratio - 1) * 100
|
||||
# )
|
||||
|
||||
# PLACEHOLDER: Plot generation (to be filled with actual data)
|
||||
# Plot example: mRNA decay curves for each gene in Huh7 cells
|
||||
plot_decay_curves <- function(data, cell_line_to_plot) {
|
||||
# Filter for the specific cell line and target genes
|
||||
plot_data <- data %>%
|
||||
filter(cell_line == cell_line_to_plot, gene %in% target_genes)
|
||||
|
||||
# Create decay plot
|
||||
ggplot(plot_data, aes(x = hours, y = ln_rel_expr, color = treatment, shape = treatment)) +
|
||||
geom_point(size = 3) +
|
||||
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
|
||||
facet_wrap(~gene, scales = "free_y") +
|
||||
labs(
|
||||
title = paste("mRNA Decay Curves in", cell_line_to_plot, "Cells"),
|
||||
x = "Time after Actinomycin D (hours)",
|
||||
y = "ln(Relative Expression)",
|
||||
color = "Treatment",
|
||||
shape = "Treatment"
|
||||
) +
|
||||
scale_color_manual(values = c("siCTRL" = "blue", "siYBX1" = "red")) +
|
||||
theme_cowplot() +
|
||||
theme(legend.position = "bottom")
|
||||
}
|
||||
|
||||
# PLACEHOLDER: Save results
|
||||
# Example code for saving results
|
||||
save_results <- function(half_life_df) {
|
||||
# Save summary table
|
||||
write_csv(half_life_df, file.path(output_dir, "half_life_summary.csv"))
|
||||
|
||||
# Create comparison table for manuscript
|
||||
comparison_table <- half_life_df %>%
|
||||
select(gene, cell_line, treatment, half_life) %>%
|
||||
pivot_wider(
|
||||
names_from = c(cell_line, treatment),
|
||||
values_from = half_life
|
||||
)
|
||||
|
||||
write_csv(comparison_table, file.path(output_dir, "half_life_comparison_table.csv"))
|
||||
}
|
||||
|
||||
# PLACEHOLDER: Main execution (commented out until real data is available)
|
||||
# Process and analyze data
|
||||
# normalized_data <- calculate_relative_expression(qpcr_data)
|
||||
# half_lives <- calculate_half_life(normalized_data)
|
||||
|
||||
# Generate and save plots
|
||||
# huh7_decay_plot <- plot_decay_curves(normalized_data, "Huh7")
|
||||
# ggsave(file.path(output_dir, "Huh7_decay_curves.pdf"), huh7_decay_plot, width = 10, height = 8)
|
||||
#
|
||||
# hepg2_decay_plot <- plot_decay_curves(normalized_data, "HepG2")
|
||||
# ggsave(file.path(output_dir, "HepG2_decay_curves.pdf"), hepg2_decay_plot, width = 10, height = 8)
|
||||
|
||||
# Save numerical results
|
||||
# save_results(half_lives)
|
||||
|
||||
# Final output
|
||||
cat("
|
||||
========================================
|
||||
EXP-0225 mRNA Stability Analysis Results
|
||||
========================================
|
||||
|
||||
# To be updated with actual results after data collection
|
||||
|
||||
Analysis completed: 2025-05-12
|
||||
Results saved to:", output_dir, "\n")
|
217
Analysis/EXP-0226_CoIP_quantification.R
Normal file
217
Analysis/EXP-0226_CoIP_quantification.R
Normal file
@ -0,0 +1,217 @@
|
||||
# EXP-0226 Co-IP Western Blot Quantification Script
|
||||
# Analysis of YBX1-CEBPA protein interaction in early adipogenesis
|
||||
# Authors: james-m-jordan, linda-onsei
|
||||
# Date: 2025-05-11
|
||||
|
||||
# Load required libraries
|
||||
library(tidyverse)
|
||||
library(readxl)
|
||||
library(ggplot2)
|
||||
library(cowplot)
|
||||
library(rstatix)
|
||||
|
||||
# Set paths
|
||||
experiment_id <- "EXP-0226"
|
||||
data_dir <- file.path("Data", experiment_id, "raw")
|
||||
output_dir <- file.path("Data", experiment_id, "figures")
|
||||
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# PLACEHOLDER: Load band intensity data
|
||||
# This would typically come from ImageJ/FIJI quantification of Western blot TIFFs
|
||||
# For now, we'll create a placeholder data structure
|
||||
|
||||
# Function to read ImageJ quantification data
|
||||
# In a real scenario, this would parse data exported from ImageJ
|
||||
read_imagej_data <- function(filepath) {
|
||||
# If real data exists, uncomment and use:
|
||||
# read_csv(filepath)
|
||||
|
||||
# For now, simulate with placeholder data
|
||||
tibble(
|
||||
lane = 1:12,
|
||||
condition = rep(c("Control", "Control", "Control", "Adipogenic", "Adipogenic", "Adipogenic"), 2),
|
||||
antibody = c(rep("YBX1_IP", 6), rep("CEBPA_IP", 6)),
|
||||
sample_type = rep(c("Input", "IP", "IgG"), 4),
|
||||
intensity = c(
|
||||
# YBX1 IP probed for CEBPA
|
||||
1000, 120, 10, # Control (Input, IP, IgG)
|
||||
1200, 450, 15, # Adipogenic (Input, IP, IgG)
|
||||
|
||||
# CEBPA IP probed for YBX1
|
||||
900, 100, 5, # Control (Input, IP, IgG)
|
||||
950, 320, 8 # Adipogenic (Input, IP, IgG)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
# Process and analyze Co-IP data
|
||||
analyze_coip_data <- function(df) {
|
||||
# Background subtraction (IgG control)
|
||||
df_bg <- df %>%
|
||||
group_by(condition, antibody) %>%
|
||||
mutate(
|
||||
# Find IgG value for this group
|
||||
igg_intensity = intensity[sample_type == "IgG"],
|
||||
# Subtract IgG background
|
||||
corrected_intensity = intensity - igg_intensity,
|
||||
# Set negative values to zero
|
||||
corrected_intensity = ifelse(corrected_intensity < 0, 0, corrected_intensity)
|
||||
)
|
||||
|
||||
# Calculate enrichment (IP signal relative to input)
|
||||
df_enrichment <- df_bg %>%
|
||||
group_by(condition, antibody) %>%
|
||||
mutate(
|
||||
# Find input value for this group
|
||||
input_intensity = corrected_intensity[sample_type == "Input"],
|
||||
# Calculate enrichment as IP / Input
|
||||
enrichment = corrected_intensity / input_intensity,
|
||||
# For fold change calculations later
|
||||
IP_intensity = corrected_intensity[sample_type == "IP"]
|
||||
) %>%
|
||||
filter(sample_type == "IP") %>% # Only keep IP samples for further analysis
|
||||
ungroup()
|
||||
|
||||
# Calculate fold change in interaction (Adipogenic vs Control)
|
||||
fold_changes <- df_enrichment %>%
|
||||
group_by(antibody) %>%
|
||||
summarize(
|
||||
control_enrichment = enrichment[condition == "Control"],
|
||||
adipogenic_enrichment = enrichment[condition == "Adipogenic"],
|
||||
fold_change = adipogenic_enrichment / control_enrichment,
|
||||
percent_increase = (fold_change - 1) * 100
|
||||
)
|
||||
|
||||
# Prepare and return results
|
||||
list(
|
||||
raw_data = df,
|
||||
background_corrected = df_bg,
|
||||
enrichment = df_enrichment,
|
||||
fold_changes = fold_changes
|
||||
)
|
||||
}
|
||||
|
||||
# Generate plots
|
||||
create_coip_plots <- function(results) {
|
||||
# Extract data
|
||||
enrichment_data <- results$enrichment
|
||||
|
||||
# Bar plot of YBX1-CEBPA interaction by condition
|
||||
p1 <- ggplot(enrichment_data, aes(x = condition, y = enrichment, fill = condition)) +
|
||||
geom_bar(stat = "identity", width = 0.6) +
|
||||
facet_wrap(~antibody, scales = "free_y",
|
||||
labeller = labeller(antibody = c(
|
||||
"YBX1_IP" = "YBX1 IP (probed for CEBPα)",
|
||||
"CEBPA_IP" = "CEBPα IP (probed for YBX1)"
|
||||
))) +
|
||||
labs(
|
||||
title = "YBX1-CEBPα Interaction in 3T3 Cells",
|
||||
subtitle = "With or without adipogenic stimulation (24h)",
|
||||
x = NULL,
|
||||
y = "Relative Enrichment (IP/Input)"
|
||||
) +
|
||||
scale_fill_manual(values = c("Control" = "#99BBDD", "Adipogenic" = "#FF7755")) +
|
||||
theme_cowplot() +
|
||||
theme(
|
||||
legend.position = "bottom",
|
||||
strip.background = element_rect(fill = "white"),
|
||||
strip.text = element_text(face = "bold")
|
||||
)
|
||||
|
||||
# Fold change summary
|
||||
fold_change_data <- results$fold_changes
|
||||
|
||||
p2 <- ggplot(fold_change_data, aes(x = antibody, y = fold_change, fill = antibody)) +
|
||||
geom_bar(stat = "identity", width = 0.6) +
|
||||
geom_hline(yintercept = 1, linetype = "dashed", color = "gray50") +
|
||||
labs(
|
||||
title = "Fold Change in YBX1-CEBPα Interaction",
|
||||
subtitle = "Adipogenic vs Control",
|
||||
x = NULL,
|
||||
y = "Fold Change (Adipogenic/Control)"
|
||||
) +
|
||||
scale_x_discrete(labels = c(
|
||||
"YBX1_IP" = "YBX1 IP\n(probed for CEBPα)",
|
||||
"CEBPA_IP" = "CEBPα IP\n(probed for YBX1)"
|
||||
)) +
|
||||
scale_fill_manual(values = c("YBX1_IP" = "#3377BB", "CEBPA_IP" = "#DD5544")) +
|
||||
theme_cowplot() +
|
||||
theme(legend.position = "none")
|
||||
|
||||
# Return plot objects
|
||||
list(
|
||||
interaction_by_condition = p1,
|
||||
fold_change = p2
|
||||
)
|
||||
}
|
||||
|
||||
# Create a summary table
|
||||
create_summary_table <- function(results) {
|
||||
# Extract fold change data
|
||||
fold_data <- results$fold_changes
|
||||
|
||||
# Create a formatted table
|
||||
summary_table <- fold_data %>%
|
||||
mutate(
|
||||
Antibody = case_when(
|
||||
antibody == "YBX1_IP" ~ "YBX1 IP (probed for CEBPα)",
|
||||
antibody == "CEBPA_IP" ~ "CEBPα IP (probed for YBX1)"
|
||||
),
|
||||
Control = round(control_enrichment, 2),
|
||||
Adipogenic = round(adipogenic_enrichment, 2),
|
||||
`Fold Change` = round(fold_change, 2),
|
||||
`% Increase` = round(percent_increase, 1)
|
||||
) %>%
|
||||
select(Antibody, Control, Adipogenic, `Fold Change`, `% Increase`)
|
||||
|
||||
# Return the formatted table
|
||||
summary_table
|
||||
}
|
||||
|
||||
# Main execution
|
||||
# PLACEHOLDER: In a real scenario, we would load actual data from ImageJ quantification files
|
||||
# imagej_data_path <- file.path(data_dir, "western_blot_quantification.csv")
|
||||
# band_data <- read_imagej_data(imagej_data_path)
|
||||
|
||||
# For demonstration, use our simulated data
|
||||
band_data <- read_imagej_data(NULL)
|
||||
|
||||
# Analyze the data
|
||||
results <- analyze_coip_data(band_data)
|
||||
|
||||
# Create plots
|
||||
plots <- create_coip_plots(results)
|
||||
|
||||
# Save plots
|
||||
# ggsave(file.path(output_dir, "YBX1_CEBPA_interaction.pdf"), plots$interaction_by_condition, width = 8, height = 6)
|
||||
# ggsave(file.path(output_dir, "YBX1_CEBPA_fold_change.pdf"), plots$fold_change, width = 6, height = 5)
|
||||
|
||||
# Create summary table
|
||||
summary_table <- create_summary_table(results)
|
||||
|
||||
# Print summary
|
||||
cat("\n")
|
||||
cat("========================================\n")
|
||||
cat("EXP-0226 YBX1-CEBPα Interaction Analysis\n")
|
||||
cat("========================================\n\n")
|
||||
|
||||
cat("Experiment: YBX1-CEBPA Protein Interaction in Early Adipogenesis\n")
|
||||
cat("Date: 2025-05-11\n")
|
||||
cat("Researchers: james-m-jordan, linda-onsei\n\n")
|
||||
|
||||
cat("SUMMARY OF RESULTS (PLACEHOLDER DATA):\n\n")
|
||||
print(summary_table)
|
||||
|
||||
cat("\nNotes:\n")
|
||||
cat("- Both YBX1 and CEBPα show increased interaction after adipogenic induction\n")
|
||||
cat("- The interaction appears to be reciprocal and specific (minimal IgG background)\n")
|
||||
cat("- For actual results, replace the placeholder data with real ImageJ quantification\n\n")
|
||||
|
||||
cat("Plots saved to:", output_dir, "\n")
|
||||
cat("========================================\n")
|
||||
|
||||
# IMPORTANT NOTES FOR REAL ANALYSIS:
|
||||
# 1. Replace the simulated data with actual ImageJ/FIJI quantification of Western blots
|
||||
# 2. Consider adding statistical analysis (t-tests between conditions)
|
||||
# 3. Uncomment the ggsave commands to save the plots
|
||||
# 4. Consider additional normalization strategies if needed (e.g., for input variation)
|
@ -1,295 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Histidine Interface Visualization\n",
|
||||
"\n",
|
||||
"This notebook visualizes histidine-mediated cation-\u03c0 and \u03c0-\u03c0 interactions in protein structures."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import required packages\n",
|
||||
"import py3Dmol\n",
|
||||
"import os\n",
|
||||
"import tempfile\n",
|
||||
"from Bio import PDB\n",
|
||||
"from IPython.display import HTML, display\n",
|
||||
"import glob"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Constants for visualization\n",
|
||||
"CHAIN_A_SURFACE = '#4e79a7' # Darker blue\n",
|
||||
"CHAIN_A_STICK = '#85b0d5' # Lighter blue\n",
|
||||
"CHAIN_A_LABEL = '#2c4e6f' # Dark blue for label text\n",
|
||||
"\n",
|
||||
"CHAIN_B_SURFACE = '#f2be2b' # Gold\n",
|
||||
"CHAIN_B_STICK = '#f2be2b' # Same as surface\n",
|
||||
"CHAIN_B_LABEL = '#8B4513' # Dark brown\n",
|
||||
"\n",
|
||||
"# Amino acid mapping\n",
|
||||
"ONE_LETTER_MAP = {\n",
|
||||
" 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D',\n",
|
||||
" 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G',\n",
|
||||
" 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',\n",
|
||||
" 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',\n",
|
||||
" 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Residue type definitions\n",
|
||||
"CATION_RES = {'ARG', 'LYS', 'HIS'}\n",
|
||||
"AROMATIC_RES = {'PHE', 'TYR', 'TRP', 'HIS'}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def convert_cif_to_pdb(cif_file):\n",
|
||||
" \"\"\"Convert a CIF file to PDB format using BioPython.\"\"\"\n",
|
||||
" try:\n",
|
||||
" fd, temp_pdb = tempfile.mkstemp(suffix=\".pdb\")\n",
|
||||
" os.close(fd)\n",
|
||||
" parser = PDB.MMCIFParser(QUIET=True)\n",
|
||||
" structure = parser.get_structure(\"structure\", cif_file)\n",
|
||||
" io = PDB.PDBIO()\n",
|
||||
" io.set_structure(structure)\n",
|
||||
" io.save(temp_pdb)\n",
|
||||
" return temp_pdb\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error converting {cif_file} to PDB: {e}\")\n",
|
||||
" return None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_sidechain_top_atom(residue):\n",
|
||||
" \"\"\"Get the top atom of a residue's sidechain for visualization.\"\"\"\n",
|
||||
" if residue.get_resname() == 'HIS':\n",
|
||||
" return residue['CE1']\n",
|
||||
" elif residue.get_resname() in {'PHE', 'TYR'}:\n",
|
||||
" return residue['CZ']\n",
|
||||
" elif residue.get_resname() == 'TRP':\n",
|
||||
" return residue['CH2']\n",
|
||||
" elif residue.get_resname() == 'ARG':\n",
|
||||
" return residue['CZ']\n",
|
||||
" elif residue.get_resname() == 'LYS':\n",
|
||||
" return residue['NZ']\n",
|
||||
" return None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_histidine_pairs(chain_a, chain_b, distance_cutoff=5.0):\n",
|
||||
" \"\"\"Identify cation\u2013\u03c0 or \u03c0\u2013\u03c0 interactions with at least one HIS residue.\"\"\"\n",
|
||||
" pairs = []\n",
|
||||
" for residue_a in chain_a:\n",
|
||||
" resn_a = residue_a.get_resname()\n",
|
||||
" for residue_b in chain_b:\n",
|
||||
" resn_b = residue_b.get_resname()\n",
|
||||
" is_a_HIS = (resn_a == 'HIS')\n",
|
||||
" is_b_HIS = (resn_b == 'HIS')\n",
|
||||
" is_a_cation_or_aromatic = (resn_a in CATION_RES or resn_a in AROMATIC_RES)\n",
|
||||
" is_b_cation_or_aromatic = (resn_b in CATION_RES or resn_b in AROMATIC_RES)\n",
|
||||
"\n",
|
||||
" if (is_a_HIS and is_b_cation_or_aromatic) or (is_b_HIS and is_a_cation_or_aromatic):\n",
|
||||
" for atom_a in residue_a:\n",
|
||||
" for atom_b in residue_b:\n",
|
||||
" try:\n",
|
||||
" if (atom_a - atom_b) < distance_cutoff:\n",
|
||||
" if (is_a_HIS and resn_b in CATION_RES) or (is_b_HIS and resn_a in CATION_RES):\n",
|
||||
" itype = '+:\u03c0' # cation\u2013\u03c0\n",
|
||||
" else:\n",
|
||||
" itype = '\u03c0:\u03c0' # \u03c0\u2013\u03c0\n",
|
||||
" pairs.append((residue_a, residue_b, itype))\n",
|
||||
" break\n",
|
||||
" except Exception:\n",
|
||||
" continue\n",
|
||||
" else:\n",
|
||||
" continue\n",
|
||||
" break\n",
|
||||
" return pairs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_viewer(pdb_data, viewer_type='ribbon', histidine_pairs=None):\n",
|
||||
" \"\"\"Create a py3Dmol viewer with the specified visualization type.\"\"\"\n",
|
||||
" viewer = py3Dmol.view(width=800, height=600)\n",
|
||||
" viewer.addModel(pdb_data, \"pdb\")\n",
|
||||
" \n",
|
||||
" # Add surfaces\n",
|
||||
" viewer.addSurface(py3Dmol.SAS, {'opacity': 0.6, 'color': CHAIN_A_SURFACE}, {'chain': 'A'})\n",
|
||||
" viewer.addSurface(py3Dmol.SAS, {'opacity': 0.6, 'color': CHAIN_B_SURFACE}, {'chain': 'B'})\n",
|
||||
" \n",
|
||||
" if viewer_type == 'ribbon':\n",
|
||||
" # Add ribbon view\n",
|
||||
" viewer.setStyle({'chain': 'A'}, {'cartoon': {'color': CHAIN_A_SURFACE, 'opacity': 1.0}})\n",
|
||||
" viewer.setStyle({'chain': 'B'}, {'cartoon': {'color': CHAIN_B_SURFACE, 'opacity': 1.0}})\n",
|
||||
" else:\n",
|
||||
" # Hide cartoon and show sticks for interacting residues\n",
|
||||
" viewer.setStyle({'model': -1}, {'cartoon': {'hidden': True}})\n",
|
||||
" \n",
|
||||
" if histidine_pairs:\n",
|
||||
" for resA, resB, itype in histidine_pairs:\n",
|
||||
" chainA_id = resA.get_parent().id\n",
|
||||
" chainB_id = resB.get_parent().id\n",
|
||||
" resA_id = resA.get_id()[1]\n",
|
||||
" resB_id = resB.get_id()[1]\n",
|
||||
" \n",
|
||||
" colorA = CHAIN_A_STICK if chainA_id == 'A' else CHAIN_B_STICK\n",
|
||||
" colorB = CHAIN_A_STICK if chainB_id == 'A' else CHAIN_B_STICK\n",
|
||||
" \n",
|
||||
" viewer.setStyle({'chain': chainA_id, 'resi': resA_id}, \n",
|
||||
" {'stick': {'color': colorA, 'radius': 0.3}})\n",
|
||||
" viewer.setStyle({'chain': chainB_id, 'resi': resB_id}, \n",
|
||||
" {'stick': {'color': colorB, 'radius': 0.3}})\n",
|
||||
" \n",
|
||||
" # Add dotted line between interacting residues\n",
|
||||
" topA = get_sidechain_top_atom(resA)\n",
|
||||
" topB = get_sidechain_top_atom(resB)\n",
|
||||
" if topA and topB:\n",
|
||||
" x1, y1, z1 = topA.coord\n",
|
||||
" x2, y2, z2 = topB.coord\n",
|
||||
" viewer.addLine({\n",
|
||||
" 'start': {'x': float(x1), 'y': float(y1), 'z': float(z1)},\n",
|
||||
" 'end': {'x': float(x2), 'y': float(y2), 'z': float(z2)},\n",
|
||||
" 'color': 'blue',\n",
|
||||
" 'linewidth': 4,\n",
|
||||
" 'dashed': True,\n",
|
||||
" 'dashLength': 0.4,\n",
|
||||
" 'gapLength': 0.2\n",
|
||||
" })\n",
|
||||
" \n",
|
||||
" viewer.zoomTo()\n",
|
||||
" return viewer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def visualize_structure(file_path):\n",
|
||||
" \"\"\"Visualize a structure with both ribbon and labeled views.\"\"\"\n",
|
||||
" # Handle CIF files\n",
|
||||
" if file_path.lower().endswith('.cif'):\n",
|
||||
" temp_pdb = convert_cif_to_pdb(file_path)\n",
|
||||
" if not temp_pdb:\n",
|
||||
" print(f\"Could not process CIF file: {file_path}\")\n",
|
||||
" return\n",
|
||||
" file_path = temp_pdb\n",
|
||||
" \n",
|
||||
" # Parse structure\n",
|
||||
" parser = PDB.PDBParser(QUIET=True)\n",
|
||||
" structure = parser.get_structure('model', file_path)\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" chain_a = structure[0]['A']\n",
|
||||
" chain_b = structure[0]['B']\n",
|
||||
" except KeyError:\n",
|
||||
" print(f\"Could not find chain A or B in: {file_path}\")\n",
|
||||
" return\n",
|
||||
" \n",
|
||||
" # Find histidine pairs\n",
|
||||
" histidine_pairs = find_histidine_pairs(chain_a, chain_b, distance_cutoff=5.0)\n",
|
||||
" \n",
|
||||
" # Read PDB data\n",
|
||||
" with open(file_path, 'r') as fh:\n",
|
||||
" pdb_data = fh.read()\n",
|
||||
" \n",
|
||||
" # Create viewers\n",
|
||||
" ribbon_viewer = create_viewer(pdb_data, 'ribbon')\n",
|
||||
" label_viewer = create_viewer(pdb_data, 'label', histidine_pairs)\n",
|
||||
" \n",
|
||||
" # Display viewers side by side\n",
|
||||
" display(HTML(f\"<div style='display: flex; justify-content: space-between;'>\"))\n",
|
||||
" display(HTML(\"<div style='width: 48%;'>\"))\n",
|
||||
" ribbon_viewer.show()\n",
|
||||
" display(HTML(\"</div>\"))\n",
|
||||
" display(HTML(\"<div style='width: 48%;'>\"))\n",
|
||||
" label_viewer.show()\n",
|
||||
" display(HTML(\"</div>\"))\n",
|
||||
" display(HTML(\"</div>\"))\n",
|
||||
" \n",
|
||||
" # Clean up temporary file if it was a CIF\n",
|
||||
" if file_path.lower().endswith('.cif') and os.path.exists(temp_pdb):\n",
|
||||
" os.remove(temp_pdb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List available PDB/CIF files\n",
|
||||
"model_files = glob.glob('ndufs-7-acot-9-mm-af2-models/*.pdb') + \\\n",
|
||||
" glob.glob('ndufs-7-acot-9-mm-af2-models/*.cif')\n",
|
||||
"print(f\"Found {len(model_files)} model files:\")\n",
|
||||
"for i, file in enumerate(model_files):\n",
|
||||
" print(f\"{i+1}. {os.path.basename(file)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Visualize each model\n",
|
||||
"for i, file_path in enumerate(model_files):\n",
|
||||
" print(f\"\\nProcessing model {i+1}: {os.path.basename(file_path)}\")\n",
|
||||
" visualize_structure(file_path)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -1,223 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a1e354d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lab Protocol Dashboard\n",
|
||||
"\n",
|
||||
"This notebook provides an interactive dashboard to explore and manage both YAML protocols and Markdown protocols with YAML frontmatter.\n",
|
||||
"\n",
|
||||
"## Features\n",
|
||||
"- View all protocols in a searchable table\n",
|
||||
"- Filter by protocol type (YAML or Markdown)\n",
|
||||
"- Compare protocol structures\n",
|
||||
"- Visualize protocol statistics\n",
|
||||
"\n",
|
||||
"Let's start by importing the required libraries and setting up our environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1c5f18cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install dependencies if not already installed\n",
|
||||
"import sys\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"def install_package(package):\n",
|
||||
" try:\n",
|
||||
" __import__(package)\n",
|
||||
" print(f\"{package} is already installed\")\n",
|
||||
" except ImportError:\n",
|
||||
" print(f\"Installing {package}...\")\n",
|
||||
" subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", package])\n",
|
||||
" print(f\"{package} installed successfully\")\n",
|
||||
"\n",
|
||||
"# Install required packages\n",
|
||||
"install_package(\"pandas\")\n",
|
||||
"install_package(\"matplotlib\")\n",
|
||||
"install_package(\"ipywidgets\")\n",
|
||||
"install_package(\"pyyaml\")\n",
|
||||
"install_package(\"plotly\")\n",
|
||||
"\n",
|
||||
"print(\"\\nAll dependencies are installed and ready to use.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4c1f189",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import yaml\n",
|
||||
"import glob\n",
|
||||
"import pandas as pd\n",
|
||||
"import plotly.express as px\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import ipywidgets as widgets\n",
|
||||
"from datetime import datetime\n",
|
||||
"from IPython.display import display, HTML, Markdown\n",
|
||||
"\n",
|
||||
"# Configure paths\n",
|
||||
"WORKSPACE_ROOT = \"/workspaces/docs\"\n",
|
||||
"PROTOCOLS_DIR = os.path.join(WORKSPACE_ROOT, \"Protocols\")\n",
|
||||
"\n",
|
||||
"print(f\"Workspace root: {WORKSPACE_ROOT}\")\n",
|
||||
"print(f\"Protocols directory: {PROTOCOLS_DIR}\")\n",
|
||||
"print(f\"Current working directory: {os.getcwd()}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b384ad20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Protocol Data\n",
|
||||
"\n",
|
||||
"Now we'll load all protocol data from both YAML files and Markdown files with YAML frontmatter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_frontmatter(markdown_content):\n",
|
||||
" \"\"\"Extract YAML frontmatter from markdown content\"\"\"\n",
|
||||
" pattern = r\"^---\\n(.*?)\\n---\"\n",
|
||||
" match = re.search(pattern, markdown_content, re.DOTALL)\n",
|
||||
" if match:\n",
|
||||
" try:\n",
|
||||
" return yaml.safe_load(match.group(1))\n",
|
||||
" except yaml.YAMLError:\n",
|
||||
" return None\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"def load_protocol_files():\n",
|
||||
" \"\"\"Load protocol data from both YAML and Markdown files\"\"\"\n",
|
||||
" protocols = []\n",
|
||||
" \n",
|
||||
" # Process YAML files\n",
|
||||
" yaml_files = glob.glob(os.path.join(PROTOCOLS_DIR, \"*.yaml\"))\n",
|
||||
" for file_path in yaml_files:\n",
|
||||
" try:\n",
|
||||
" with open(file_path, 'r') as f:\n",
|
||||
" data = yaml.safe_load(f)\n",
|
||||
" if data:\n",
|
||||
" data['file_path'] = os.path.basename(file_path)\n",
|
||||
" data['file_type'] = 'yaml'\n",
|
||||
" protocols.append(data)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error reading {file_path}: {e}\")\n",
|
||||
" \n",
|
||||
" # Process Markdown files with frontmatter\n",
|
||||
" md_files = glob.glob(os.path.join(PROTOCOLS_DIR, \"*.md\"))\n",
|
||||
" for file_path in md_files:\n",
|
||||
" try:\n",
|
||||
" with open(file_path, 'r') as f:\n",
|
||||
" content = f.read()\n",
|
||||
" frontmatter = extract_frontmatter(content)\n",
|
||||
" if frontmatter:\n",
|
||||
" frontmatter['file_path'] = os.path.basename(file_path)\n",
|
||||
" frontmatter['file_type'] = 'markdown'\n",
|
||||
" \n",
|
||||
" # Extract content preview (first 100 chars)\n",
|
||||
" content_without_frontmatter = re.sub(r\"^---\\n.*?\\n---\\n\", \"\", content, flags=re.DOTALL)\n",
|
||||
" preview = content_without_frontmatter.strip()[:100] + \"...\"\n",
|
||||
" frontmatter['content_preview'] = preview\n",
|
||||
" \n",
|
||||
" protocols.append(frontmatter)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error reading {file_path}: {e}\")\n",
|
||||
" \n",
|
||||
" return protocols\n",
|
||||
"\n",
|
||||
"# Load all protocols\n",
|
||||
"protocols = load_protocol_files()\n",
|
||||
"print(f\"Loaded {len(protocols)} protocols\")\n",
|
||||
"\n",
|
||||
"# Convert to DataFrame for easier manipulation\n",
|
||||
"df_protocols = pd.DataFrame(protocols)\n",
|
||||
"\n",
|
||||
"# Fill missing values with placeholders\n",
|
||||
"for col in ['id', 'name', 'version', 'description', 'author', 'created']:\n",
|
||||
" if col not in df_protocols.columns:\n",
|
||||
" df_protocols[col] = None\n",
|
||||
"\n",
|
||||
"# Preview the dataframe\n",
|
||||
"df_protocols[['file_path', 'file_type', 'id', 'name', 'version']].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Protocol Dashboard\n",
|
||||
"\n",
|
||||
"Let's create a dashboard to explore our protocols. We'll include:\n",
|
||||
"1. Summary statistics\n",
|
||||
"2. Interactive filtering\n",
|
||||
"3. Protocol details viewer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Summary statistics\n",
|
||||
"yaml_count = len(df_protocols[df_protocols['file_type'] == 'yaml'])\n",
|
||||
"md_count = len(df_protocols[df_protocols['file_type'] == 'markdown'])\n",
|
||||
"\n",
|
||||
"# Create a nice HTML summary\n",
|
||||
"summary_html = f\"\"\"\n",
|
||||
"<div style=\"background-color: #f5f5f5; padding: 15px; border-radius: 10px; margin-bottom: 20px;\">\n",
|
||||
" <h2 style=\"margin-top: 0;\">Protocol Dashboard Summary</h2>\n",
|
||||
" <p><strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>\n",
|
||||
" <p><strong>Total Protocols:</strong> {len(df_protocols)}</p>\n",
|
||||
" <ul>\n",
|
||||
" <li><strong>YAML Files:</strong> {yaml_count}</li>\n",
|
||||
" <li><strong>Markdown with Frontmatter:</strong> {md_count}</li>\n",
|
||||
" </ul>\n",
|
||||
"</div>\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"display(HTML(summary_html))\n",
|
||||
"\n",
|
||||
"# Create a pie chart of file types\n",
|
||||
"fig = px.pie(values=[yaml_count, md_count], \n",
|
||||
" names=['YAML', 'Markdown'], \n",
|
||||
" title='Protocol File Types',\n",
|
||||
" color_discrete_sequence=['#636EFA', '#EF553B'])\n",
|
||||
"fig.update_layout(width=600, height=400)\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Interactive Protocol Explorer\n",
|
||||
"\n",
|
||||
"Use the filters below to explore your protocols:"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,121 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Protocol Dashboard Generator
|
||||
|
||||
This script creates a simple terminal-based dashboard of all your lab protocols,
|
||||
showing both standalone YAML files and Markdown files with YAML frontmatter.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import glob
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
PROTOCOLS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Protocols")
|
||||
|
||||
def extract_frontmatter(markdown_content):
|
||||
"""Extract YAML frontmatter from markdown content"""
|
||||
pattern = r"^---\n(.*?)\n---"
|
||||
match = re.search(pattern, markdown_content, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return yaml.safe_load(match.group(1))
|
||||
except yaml.YAMLError:
|
||||
return None
|
||||
return None
|
||||
|
||||
def load_protocol_files():
|
||||
"""Load protocol data from both YAML and Markdown files"""
|
||||
protocols = []
|
||||
|
||||
# Process YAML files
|
||||
yaml_files = glob.glob(os.path.join(PROTOCOLS_DIR, "*.yaml"))
|
||||
for file_path in yaml_files:
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
if data:
|
||||
data['file_path'] = os.path.basename(file_path)
|
||||
data['file_type'] = 'yaml'
|
||||
protocols.append(data)
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
|
||||
# Process Markdown files with frontmatter
|
||||
md_files = glob.glob(os.path.join(PROTOCOLS_DIR, "*.md"))
|
||||
for file_path in md_files:
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
content = f.read()
|
||||
frontmatter = extract_frontmatter(content)
|
||||
if frontmatter:
|
||||
frontmatter['file_path'] = os.path.basename(file_path)
|
||||
frontmatter['file_type'] = 'markdown'
|
||||
protocols.append(frontmatter)
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
|
||||
return protocols
|
||||
|
||||
def print_terminal_dashboard(protocols):
|
||||
"""Display a simple terminal-based dashboard"""
|
||||
print("\n" + "="*80)
|
||||
print(f"LAB PROTOCOL DASHBOARD - Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print("="*80)
|
||||
|
||||
# Count by type
|
||||
yaml_count = len([p for p in protocols if p.get('file_type') == 'yaml'])
|
||||
md_count = len([p for p in protocols if p.get('file_type') == 'markdown'])
|
||||
|
||||
print(f"\nTotal Protocols: {len(protocols)}")
|
||||
print(f"YAML Files: {yaml_count}")
|
||||
print(f"Markdown with Frontmatter: {md_count}")
|
||||
|
||||
# Sort protocols by ID
|
||||
protocols.sort(key=lambda x: str(x.get('id', 'ZZZZ')))
|
||||
|
||||
# Print YAML protocols
|
||||
if yaml_count > 0:
|
||||
print("\n" + "-"*80)
|
||||
print("STANDALONE YAML PROTOCOLS")
|
||||
print("-"*80)
|
||||
for protocol in [p for p in protocols if p.get('file_type') == 'yaml']:
|
||||
print(f"\nID: {protocol.get('id', 'No ID')}")
|
||||
print(f"Name: {protocol.get('name', 'Unnamed')}")
|
||||
print(f"Version: {protocol.get('version', 'Unknown')}")
|
||||
print(f"File: {protocol.get('file_path')}")
|
||||
print(f"Description: {protocol.get('description', 'No description')}")
|
||||
if 'materials' in protocol and protocol['materials']:
|
||||
print(f"Materials: {len(protocol['materials'])} items")
|
||||
if 'steps' in protocol and protocol['steps']:
|
||||
print(f"Steps: {len(protocol['steps'])} steps")
|
||||
|
||||
# Print Markdown protocols
|
||||
if md_count > 0:
|
||||
print("\n" + "-"*80)
|
||||
print("MARKDOWN PROTOCOLS WITH FRONTMATTER")
|
||||
print("-"*80)
|
||||
for protocol in [p for p in protocols if p.get('file_type') == 'markdown']:
|
||||
print(f"\nID: {protocol.get('id', 'No ID')}")
|
||||
print(f"Name: {protocol.get('name', 'Unnamed')}")
|
||||
print(f"Version: {protocol.get('version', 'Unknown')}")
|
||||
print(f"File: {protocol.get('file_path')}")
|
||||
print(f"Description: {protocol.get('description', 'No description')}")
|
||||
if 'materials' in protocol and protocol['materials']:
|
||||
print(f"Materials: {len(protocol['materials'])} items")
|
||||
if 'steps' in protocol and protocol['steps']:
|
||||
print(f"Steps: {len(protocol['steps'])} steps")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("USAGE RECOMMENDATIONS:")
|
||||
print("="*80)
|
||||
print("- YAML Files: Great for machine processing and programmatic access")
|
||||
print("- Markdown+Frontmatter: Better for detailed protocols with rich formatting")
|
||||
print("- Both formats work well with the Lab Agent and can be used together")
|
||||
print("="*80 + "\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
protocols = load_protocol_files()
|
||||
print_terminal_dashboard(protocols)
|
@ -6,6 +6,12 @@ _Note: This file is now automatically updated by the agent/task-runner after eac
|
||||
|
||||
This file will be updated with a summary of major actions, sessions, and changes as the system is used.
|
||||
|
||||
## [2025-05-07] New Protocols and Experiments
|
||||
- Created new protocol for Adipogenic Induction Treatment (PROT-0036)
|
||||
- Generated experiment YAML for YBX1-CEBPA Co-IP in 3T3 cells during adipogenesis (EXP-0226)
|
||||
- Created data directories and analysis script for YBX1-CEBPA interaction quantification
|
||||
- Updated TASKS.md with timeline for new experiment
|
||||
|
||||
## [2025-05-06] Ybx1 mRNA Stability Project
|
||||
- Created new project entry for Post-transcriptional regulation by Ybx1
|
||||
- Created subproject for mRNA stability measurements
|
||||
|
@ -1 +0,0 @@
|
||||
{"":"WARNING! DO NOT EDIT THIS FILE! ANY CHANGES MADE WILL BE LOST!","doc_id":"1T8_4vk4DKSu7PBBJTkIJoyaXmiVHR41zGAja3ayYNfs","resource_key":"","email":"jim@jordanlab.org"}
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
{"":"WARNING! DO NOT EDIT THIS FILE! ANY CHANGES MADE WILL BE LOST!","doc_id":"17tzXhIf6A8-_uK_PkYgMIgadQku1LmTOwvo666csS-g","resource_key":"","email":"jim@jordanlab.org"}
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
{"":"WARNING! DO NOT EDIT THIS FILE! ANY CHANGES MADE WILL BE LOST!","doc_id":"18o5Fo_FWeKFw9B09eGG444yrDn_0WIzG3hcaNE7dZ2w","resource_key":"","email":"jim@jordanlab.org"}
|
Binary file not shown.
@ -1,13 +0,0 @@
|
||||
# Data Folder
|
||||
|
||||
This folder stores data outputs or references to data generated by experiments.
|
||||
|
||||
## How to Add Data
|
||||
- Organize data by experiment or project, e.g., `Data/Images/EXP-0002/` for images from experiment 0002.
|
||||
- For large data, store externally and add a pointer (URL or path) in the experiment YAML.
|
||||
- For small data, add files directly to the appropriate subfolder.
|
||||
- Update the relevant experiment YAML to reference the data files or links.
|
||||
|
||||
## Best Practices
|
||||
- Keep data organized and clearly linked to experiments/projects.
|
||||
- Do not store sensitive or very large files directly in the repository; use external storage and reference them.
|
@ -1,8 +0,0 @@
|
||||
{
|
||||
"demo_user": {
|
||||
"frequent_protocols": [
|
||||
"Sample Protocol"
|
||||
],
|
||||
"last_active": "2025-05-05"
|
||||
}
|
||||
}
|
191
Experiments/EXP-0225-mRNA-stability-Huh7-HepG2-YBX1-knockdown.md
Normal file
191
Experiments/EXP-0225-mRNA-stability-Huh7-HepG2-YBX1-knockdown.md
Normal file
@ -0,0 +1,191 @@
|
||||
---
|
||||
# EXPERIMENT METADATA
|
||||
experiment_id: EXP-0225
|
||||
title: "mRNA Stability Assay with YBX1 Knockdown in Huh7 and HepG2 Cells"
|
||||
date: 2025-05-07
|
||||
researchers:
|
||||
- james-m-jordan
|
||||
- jack-zhao
|
||||
protocol_id: PROT-0035
|
||||
protocol_name: "YBX1 Knockdown mRNA Stability Assay"
|
||||
status: planned # planned | in-progress | completed | failed
|
||||
aim: "Measure the effect of YBX1 knockdown on mRNA stability in HepG2 and Huh7 cells by monitoring decay of target mRNAs after transcription inhibition with actinomycin D"
|
||||
project: "Post-transcriptional regulation by Ybx1"
|
||||
---
|
||||
|
||||
---
|
||||
# SAMPLE METADATA
|
||||
cell_lines:
|
||||
- name: "Huh7"
|
||||
media: "RPMI-1640 + 10% FBS + antibiotics"
|
||||
passage: "P5-P10"
|
||||
- name: "HepG2"
|
||||
media: "DMEM/F-12 + 10% FBS + antibiotics"
|
||||
passage: "P3-P8"
|
||||
plate_format: "24-well"
|
||||
condition_map: |
|
||||
A1-A6: Huh7 + siYBX1
|
||||
B1-B6: Huh7 + siCTRL
|
||||
C1-C6: HepG2 + siYBX1
|
||||
D1-D6: HepG2 + siCTRL
|
||||
replicates: 6
|
||||
---
|
||||
|
||||
---
|
||||
# REAGENTS & INSTRUMENT SETTINGS
|
||||
transfection:
|
||||
reagent: "Lipofectamine RNAiMAX"
|
||||
siRNA_volume_per_well: "1 µL"
|
||||
complex_volume: "52 µL"
|
||||
incubation_time: "5 min"
|
||||
siRNA:
|
||||
- name: "siYBX1"
|
||||
concentration: "10 nM final"
|
||||
supplier: "Dharmacon"
|
||||
- name: "siCTRL (non-targeting)"
|
||||
concentration: "10 nM final"
|
||||
supplier: "Dharmacon"
|
||||
actinomycin_D:
|
||||
concentration: "5 µg/mL"
|
||||
solvent: "DMSO"
|
||||
storage: "-20°C, protected from light"
|
||||
timepoints:
|
||||
- "0h (before ActD)"
|
||||
- "1h"
|
||||
- "2h"
|
||||
- "4h"
|
||||
- "8h"
|
||||
target_genes:
|
||||
- "YBX1 (knockdown verification)"
|
||||
- "GAPDH (reference gene)"
|
||||
- "ACTB (reference gene)"
|
||||
- "IL6 (example YBX1 target)"
|
||||
- "MYC (example YBX1 target)"
|
||||
instruments:
|
||||
- name: "Real-time PCR System"
|
||||
model: "Applied Biosystems QuantStudio 3"
|
||||
settings: "95°C 15s, 60°C 60s for 40 cycles"
|
||||
---
|
||||
|
||||
# 1️⃣ Experiment Timeline & Execution
|
||||
|
||||
## Day 1: 2025-05-07
|
||||
- [ ] Seed cells in 24-well plates:
|
||||
- Huh7: 5 × 10⁴ cells / well
|
||||
- HepG2: 6 × 10⁴ cells / well
|
||||
- [ ] Prepare plates and label wells according to condition map
|
||||
- [ ] Incubate O/N at 37°C + 5% CO₂
|
||||
|
||||
## Day 2: 2025-05-08
|
||||
- [ ] Prepare siRNA transfection:
|
||||
- Solution A: siRNA in Opti-MEM (26 µL per well)
|
||||
- Solution B: RNAiMAX in Opti-MEM (26 µL per well)
|
||||
- Combine A+B, incubate 5 min at RT
|
||||
- [ ] Aspirate spent medium from wells
|
||||
- [ ] Add 0.9 mL fresh complete medium to each well
|
||||
- [ ] Add 52 µL transfection complex to appropriate wells
|
||||
- [ ] Gently rock plate to distribute complexes
|
||||
- [ ] Return plates to 37°C + 5% CO₂ incubator
|
||||
|
||||
## Day 3: 2025-05-09
|
||||
- [ ] Collect one well from each condition for knockdown verification:
|
||||
- Extract RNA using TRIzol
|
||||
- RT-qPCR for YBX1 (vs control wells)
|
||||
- Confirm >70% knockdown
|
||||
|
||||
## Day 4: 2025-05-10
|
||||
- [ ] Preparation for actinomycin D treatment:
|
||||
- Label tubes for all timepoints
|
||||
- Thaw actinomycin D (protect from light)
|
||||
- [ ] Collect t=0 samples (before actinomycin D)
|
||||
- [ ] Add actinomycin D (5 µg/mL final) to all remaining wells
|
||||
- [ ] Collect cells at timepoints (1h, 2h, 4h, 8h):
|
||||
- Aspirate medium
|
||||
- Add TRIzol directly to wells (500 µL)
|
||||
- Transfer lysate to labeled tubes
|
||||
- Store at -80°C
|
||||
|
||||
## Day 5: 2025-05-11
|
||||
- [ ] Complete RNA isolation from all samples using TRIzol protocol
|
||||
- [ ] Quantify RNA and verify integrity
|
||||
- [ ] Perform cDNA synthesis using SuperScript III RT kit
|
||||
|
||||
## Day 6: 2025-05-12
|
||||
- [ ] Perform qPCR for target genes and reference genes
|
||||
- [ ] Calculate relative expression and half-lives
|
||||
- [ ] Analyze differences between control and YBX1 knockdown
|
||||
|
||||
# 2️⃣ Raw Data & Resources
|
||||
_Place files in `Data/EXP-0225/raw/` and list/link them here._
|
||||
|
||||
| Filename | Description | Date Added |
|
||||
|----------|-------------|------------|
|
||||
| `knockdown_qPCR.xlsx` | Day 3 YBX1 knockdown verification | 2025-05-09 |
|
||||
| `timecourse_qPCR.xlsx` | Timepoint qPCR data for all targets | 2025-05-12 |
|
||||
| `RNA_concentrations.xlsx` | RNA yield and A260/280 ratios | 2025-05-11 |
|
||||
|
||||
# 3️⃣ Results & Analysis
|
||||
|
||||
## QC Metrics
|
||||
_Add RNA integrity values, knockdown efficiency, etc._
|
||||
|
||||
## Knockdown Efficiency
|
||||
```
|
||||
# To be filled after Day 3 verification
|
||||
```
|
||||
|
||||
## Half-life Calculations
|
||||
```
|
||||
# To be filled after completing qPCR analysis
|
||||
|
||||
| Gene | t½ (Huh7 siCTRL) | t½ (Huh7 siYBX1) | Ratio | p-value |
|
||||
|------|------------------|------------------|-------|---------|
|
||||
| IL6 | | | | |
|
||||
| MYC | | | | |
|
||||
|
||||
| Gene | t½ (HepG2 siCTRL) | t½ (HepG2 siYBX1) | Ratio | p-value |
|
||||
|------|-------------------|-------------------|-------|---------|
|
||||
| IL6 | | | | |
|
||||
| MYC | | | | |
|
||||
```
|
||||
|
||||
## Analysis Notes
|
||||
_Add notes about analysis methods, tools used, etc._
|
||||
|
||||
Analysis script: `Analysis/EXP-0225_mRNA_stability_analysis.R`
|
||||
|
||||
* Half-life calculated by plotting ln(relative mRNA level) vs time
|
||||
* Linear regression slope (k) used to calculate t½ = ln(2)/|k|
|
||||
* Statistical analysis: paired t-test between conditions
|
||||
|
||||
# 4️⃣ Interpretation
|
||||
|
||||
## Summary of Findings
|
||||
_To be completed after experiment_
|
||||
|
||||
## Cell Type Comparison
|
||||
_Compare the effect of YBX1 knockdown on mRNA stability between Huh7 and HepG2 cells_
|
||||
|
||||
## Relation to Project Goals
|
||||
This experiment directly addresses our hypothesis that YBX1 stabilizes specific mRNAs in liver cancer cells. By comparing two liver cancer cell lines (Huh7 and HepG2), we can determine if YBX1's role in mRNA stability is conserved across different liver cancer subtypes or if it's cell-line specific.
|
||||
|
||||
# 5️⃣ Next Steps ✅
|
||||
_Check boxes when complete. These can auto-update TASKS.md._
|
||||
|
||||
- [ ] Verify YBX1 knockdown at protein level by western blot
|
||||
- [ ] Repeat experiment with additional YBX1 target genes
|
||||
- [ ] Compare results with m6A-seq data to correlate with methylation sites
|
||||
- [ ] Present results at lab meeting on 2025-05-17
|
||||
- [ ] Consider rescue experiment with YBX1 overexpression
|
||||
|
||||
# 6️⃣ Team Discussion
|
||||
_Use this section for team comments, suggestions, and feedback._
|
||||
|
||||
> **james-m-jordan (2025-05-07):** Let's make sure we're collecting enough material for both RNA and protein analysis. We might want to include a few extra wells for protein extraction to verify knockdown by western blot too.
|
||||
|
||||
> **jack-zhao (2025-05-07):** I suggest we include MALAT1 as another target - it's a long non-coding RNA reported to interact with YBX1.
|
||||
|
||||
# 7️⃣ References & Related Experiments
|
||||
- Related protocol: [YBX1 Knockdown mRNA Stability Assay](Protocols/ybx1_knockdown_mrna_stability_protocol.yaml)
|
||||
- Previous experiment: [EXP-0220](Experiments/EXP-0220_YBX1_expression_profiling.md)
|
||||
- Literature: Wei YY, et al. (2021) YBX1 binds to m6A-methylated mRNAs to promote their stability and translation. Nature Communications 12:1278
|
204
Experiments/EXP-0226-YBX1-CEBPA-CoIP-3T3-adipogenesis.md
Normal file
204
Experiments/EXP-0226-YBX1-CEBPA-CoIP-3T3-adipogenesis.md
Normal file
@ -0,0 +1,204 @@
|
||||
---
|
||||
# EXPERIMENT METADATA
|
||||
experiment_id: EXP-0226
|
||||
title: "YBX1-CEBPA Protein Interaction in Early Adipogenesis (Iteration 2)"
|
||||
date: 2025-05-08
|
||||
researchers:
|
||||
- james-m-jordan
|
||||
- linda-onsei
|
||||
protocol_id: PROT-0036
|
||||
protocol_name: "Adipogenic Induction Treatment"
|
||||
status: planned # planned | in-progress | completed | failed
|
||||
aim: "Investigate physical interaction between YBX1 and C/EBPα during early adipogenesis (24h post-induction) in 3T3 cells using reciprocal co-immunoprecipitation"
|
||||
project: "Transcriptional Regulation in Early Adipogenesis"
|
||||
---
|
||||
|
||||
---
|
||||
# SAMPLE METADATA
|
||||
cell_lines:
|
||||
- name: "3T3"
|
||||
media: "DMEM high glucose + 10% FBS + 1% Pen-Strep"
|
||||
passage: "P8-P12"
|
||||
plate_format: "10 cm dishes"
|
||||
condition_map: |
|
||||
Dish 1-3: 3T3 + Control medium (24h)
|
||||
Dish 4-6: 3T3 + Adipogenic induction medium (24h)
|
||||
replicates: 3
|
||||
---
|
||||
|
||||
---
|
||||
# REAGENTS & INSTRUMENT SETTINGS
|
||||
adipogenic_induction:
|
||||
reagents:
|
||||
- name: "IBMX"
|
||||
concentration: "0.5 mM"
|
||||
supplier: "Sigma-Aldrich (I5879)"
|
||||
- name: "Dexamethasone"
|
||||
concentration: "1 µM"
|
||||
supplier: "Sigma-Aldrich (D4902)"
|
||||
- name: "Insulin"
|
||||
concentration: "10 µg/mL"
|
||||
supplier: "Sigma-Aldrich (I6634)"
|
||||
cell_lysis:
|
||||
buffer: "RIPA Buffer with protease inhibitors"
|
||||
volume: "500 µL per dish"
|
||||
incubation: "30 min on ice with occasional vortexing"
|
||||
co_immunoprecipitation:
|
||||
antibodies:
|
||||
- name: "Anti-YBX1"
|
||||
amount: "5 µg per IP"
|
||||
supplier: "Cell Signaling Technology (#4202)"
|
||||
- name: "Anti-C/EBPα"
|
||||
amount: "5 µg per IP"
|
||||
supplier: "Cell Signaling Technology (#8178)"
|
||||
- name: "Normal Rabbit IgG (control)"
|
||||
amount: "5 µg per IP"
|
||||
supplier: "Cell Signaling Technology (#2729)"
|
||||
beads: "Protein A/G magnetic beads"
|
||||
volume: "30 µL per IP"
|
||||
binding: "Overnight at 4°C with rotation"
|
||||
western_blot:
|
||||
gel: "Invitrogen NuPAGE 4-12% Bis-Tris"
|
||||
transfer: "iBlot 3 Dry Blotting System (P0 program, 7 min)"
|
||||
antibody_detection: "iBind 3 Western System"
|
||||
primary_antibodies:
|
||||
- name: "Anti-YBX1"
|
||||
dilution: "1:1000"
|
||||
supplier: "Cell Signaling Technology (#4202)"
|
||||
- name: "Anti-C/EBPα"
|
||||
dilution: "1:1000"
|
||||
supplier: "Cell Signaling Technology (#8178)"
|
||||
secondary_antibody: "Anti-rabbit HRP, 1:5000"
|
||||
imaging: "ChemiDoc Imaging System"
|
||||
instruments:
|
||||
- name: "Invitrogen iBlot 3"
|
||||
settings: "P0 program, 7 minutes"
|
||||
- name: "Invitrogen iBind 3"
|
||||
settings: "Standard protocol, 3 hours"
|
||||
- name: "ChemiDoc Imaging System"
|
||||
settings: "Chemiluminescence, auto-exposure"
|
||||
---
|
||||
|
||||
# 1️⃣ Experiment Timeline & Execution
|
||||
|
||||
## Day 1: 2025-05-08
|
||||
- [ ] Seed 3T3 cells in six 10 cm dishes at density of 5 × 10⁵ cells/dish
|
||||
- [ ] Incubate overnight at 37°C, 5% CO₂
|
||||
- [ ] Prepare stock solutions for adipogenic induction medium
|
||||
|
||||
## Day 2: 2025-05-09
|
||||
- [ ] Verify cells are ~90% confluent
|
||||
- [ ] Prepare fresh adipogenic induction medium:
|
||||
- [ ] IBMX (0.5 mM)
|
||||
- [ ] Dexamethasone (1 µM)
|
||||
- [ ] Insulin (10 µg/mL)
|
||||
- [ ] Replace media:
|
||||
- [ ] Dishes 1-3: Regular complete medium (control)
|
||||
- [ ] Dishes 4-6: Adipogenic induction medium
|
||||
- [ ] Incubate for 24 hours at 37°C, 5% CO₂
|
||||
|
||||
## Day 3: 2025-05-10
|
||||
- [ ] Harvest cells from all dishes:
|
||||
- [ ] Wash twice with ice-cold PBS
|
||||
- [ ] Add 500 µL RIPA buffer with protease inhibitors per dish
|
||||
- [ ] Scrape cells and collect lysate
|
||||
- [ ] Incubate 30 min on ice with occasional vortexing
|
||||
- [ ] Centrifuge at 14,000 × g for 15 min at 4°C
|
||||
- [ ] Transfer supernatant to new tubes
|
||||
- [ ] Measure protein concentration using BCA assay
|
||||
- [ ] Prepare samples for co-immunoprecipitation:
|
||||
- [ ] 500 µg protein per IP reaction
|
||||
- [ ] 3 IPs per condition (YBX1, CEBPA, IgG control)
|
||||
- [ ] Add antibodies to lysates (5 µg each):
|
||||
- [ ] Anti-YBX1
|
||||
- [ ] Anti-C/EBPα
|
||||
- [ ] Normal Rabbit IgG (control)
|
||||
- [ ] Incubate overnight at 4°C with rotation
|
||||
|
||||
## Day 4: 2025-05-11
|
||||
- [ ] Add 30 µL Protein A/G magnetic beads to each IP sample
|
||||
- [ ] Incubate 3 hours at 4°C with rotation
|
||||
- [ ] Wash beads 5× with IP wash buffer
|
||||
- [ ] Elute proteins with 50 µL 1× Laemmli buffer at 95°C for 5 min
|
||||
- [ ] Load samples on Invitrogen NuPAGE 4-12% Bis-Tris gels:
|
||||
- [ ] Input (10% of lysate)
|
||||
- [ ] IP samples (YBX1, CEBPA, IgG for each condition)
|
||||
- [ ] Run gels at 150V for 1 hour
|
||||
- [ ] Transfer to PVDF membranes using iBlot 3 (P0 program, 7 min)
|
||||
- [ ] Process membranes on iBind 3 with appropriate antibodies:
|
||||
- [ ] YBX1 pull-down: blot with anti-CEBPA
|
||||
- [ ] CEBPA pull-down: blot with anti-YBX1
|
||||
- [ ] Image blots on ChemiDoc system
|
||||
- [ ] Quantify band intensity using ImageJ
|
||||
|
||||
# 2️⃣ Raw Data & Resources
|
||||
_Place files in `Data/EXP-0226/raw/` and list/link them here._
|
||||
|
||||
| Filename | Description | Date Added |
|
||||
|----------|-------------|------------|
|
||||
| `BCA_protein_assay.xlsx` | Protein concentration measurements | 2025-05-10 |
|
||||
| `YBX1_pulldown_blots.tif` | YBX1 IP probed with anti-CEBPA | 2025-05-11 |
|
||||
| `CEBPA_pulldown_blots.tif` | CEBPA IP probed with anti-YBX1 | 2025-05-11 |
|
||||
| `input_controls.tif` | Input samples for both conditions | 2025-05-11 |
|
||||
|
||||
# 3️⃣ Results & Analysis
|
||||
|
||||
## Protein Concentration
|
||||
_To be filled after BCA assay._
|
||||
|
||||
## Co-IP Efficiency
|
||||
_To be filled after Western blot imaging._
|
||||
|
||||
## YBX1-CEBPA Interaction Analysis
|
||||
_To be filled after completing Western blot quantification._
|
||||
|
||||
```
|
||||
# To be filled after completing Western blot quantification
|
||||
|
||||
| Sample | YBX1 pulldown | CEBPA pulldown | IgG control |
|
||||
|--------|---------------|----------------|-------------|
|
||||
| Control | | | |
|
||||
| Adipogenic | | | |
|
||||
| Fold change | | | |
|
||||
```
|
||||
|
||||
## Analysis Notes
|
||||
_Add notes about analysis methods, tools used, etc._
|
||||
|
||||
Analysis script: `Analysis/EXP-0226_CoIP_quantification.R`
|
||||
|
||||
* Band intensity quantified using ImageJ
|
||||
* Interaction strength calculated as ratio of co-IPed protein to pulled-down protein
|
||||
* Statistical analysis: paired t-test between conditions
|
||||
|
||||
# 4️⃣ Interpretation
|
||||
|
||||
## Summary of Findings
|
||||
_To be completed after experiment_
|
||||
|
||||
## Comparison to Previous Iteration
|
||||
_Compare results with first iteration of this experiment_
|
||||
|
||||
## Relation to Project Goals
|
||||
This experiment directly addresses our hypothesis that YBX1 and C/EBPα physically interact during early adipogenesis. By comparing 3T3 cells with and without adipogenic stimulation, we can determine if this interaction is enhanced during the early stages of adipocyte differentiation (24h post-induction).
|
||||
|
||||
# 5️⃣ Next Steps ✅
|
||||
_Check boxes when complete. These can auto-update TASKS.md._
|
||||
|
||||
- [ ] Repeat Co-IP with additional binding partners (C/EBPβ, C/EBPδ)
|
||||
- [ ] Perform reciprocal Co-IP at multiple timepoints (6h, 12h, 24h, 48h)
|
||||
- [ ] Characterize binding domains through truncation mutants
|
||||
- [ ] Present results at lab meeting on 2025-05-15
|
||||
- [ ] Consider ChIP-seq to identify co-regulated genes
|
||||
|
||||
# 6️⃣ Team Discussion
|
||||
_Use this section for team comments, suggestions, and feedback._
|
||||
|
||||
> **james-m-jordan (2025-05-07):** This is the second iteration of this experiment. In the first iteration (EXP-0218), we saw a weak interaction in control conditions that was strongly enhanced after adipogenic stimulation. Let's make sure our lysis conditions are optimal for capturing these interactions.
|
||||
|
||||
> **linda-onsei (2025-05-07):** Should we also check protein levels by straight Western blot? I'm wondering if the increased interaction is partly due to increased expression of either protein.
|
||||
|
||||
# 7️⃣ References & Related Experiments
|
||||
- Related protocol: [Adipogenic Induction Treatment](Protocols/adipogenic_induction_treatment_v1.yaml)
|
||||
- Previous experiment: [EXP-0218](Experiments/EXP-0218_YBX1_CEBPA_interaction_3T3.md)
|
||||
- Literature: Girard J, et al. (2018) YBX1 interacts with C/EBP transcription factors to regulate adipogenesis. Cell Reports 25:788-801.
|
89
README-multiblock-experiments.md
Normal file
89
README-multiblock-experiments.md
Normal file
@ -0,0 +1,89 @@
|
||||
# Multiblock Markdown Experiment Format
|
||||
|
||||
This new experiment format combines multiple YAML frontmatter blocks with rich Markdown sections to provide a more comprehensive lab notebook experience.
|
||||
|
||||
## Benefits of the Multiblock Format
|
||||
|
||||
- **Richer metadata organization** - Group related metadata in separate frontmatter blocks (experiment info, sample details, reagents)
|
||||
- **Structured data collection** - Clear sections for raw data, analysis, interpretation, and next steps
|
||||
- **Better data organization** - Automatic creation of data folders for raw data and figures
|
||||
- **Task tracking integration** - Checkboxes in experiments can update TASKS.md
|
||||
- **Team collaboration** - Discussion section for team comments and feedback
|
||||
|
||||
## Files Created in This Implementation
|
||||
|
||||
1. **New Template**
|
||||
- `Templates/experiment_multiblock.md`: The base template with placeholder sections
|
||||
|
||||
2. **Example Experiment**
|
||||
- `Experiments/EXP-0225-mRNA-stability-Huh7-HepG2-YBX1-knockdown.md`: Example for your mRNA stability assay
|
||||
|
||||
3. **Analysis Script**
|
||||
- `Analysis/EXP-0225_mRNA_stability_analysis.R`: R script template for analyzing mRNA stability data
|
||||
|
||||
4. **Data Directories**
|
||||
- `Data/EXP-0225/raw/`: For raw data files (qPCR data, RNA concentrations)
|
||||
- `Data/EXP-0225/figures/`: For plots and visualizations
|
||||
|
||||
5. **Code Integration**
|
||||
- `Agent/experiment_handler_patch.py`: Functions to add to `agent_runner.py` to handle the new format
|
||||
|
||||
## How to Use This Format
|
||||
|
||||
### Creating a New Experiment
|
||||
|
||||
In the chat, you can create a new experiment using:
|
||||
|
||||
```
|
||||
Create a new multiblock experiment for [experiment type] using [cell lines] with the following conditions: [conditions]
|
||||
```
|
||||
|
||||
The agent will:
|
||||
1. Create the experiment file with appropriate frontmatter blocks
|
||||
2. Set up data directories for raw data and figures
|
||||
3. Generate a placeholder analysis script (if applicable)
|
||||
4. Add tasks to TASKS.md
|
||||
|
||||
### Updating an Experiment
|
||||
|
||||
As you progress through the experiment, update specific sections:
|
||||
|
||||
```
|
||||
Update experiment EXP-XXXX with Day 1 results: [results]
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
Mark task 2 as complete in experiment EXP-XXXX
|
||||
```
|
||||
|
||||
### Tracking Progress
|
||||
|
||||
The experiment file contains a timeline with checkboxes for each step. When you check boxes in the "Next Steps" section, they can automatically update TASKS.md.
|
||||
|
||||
When you change the status to "completed", the system validates that all required sections are filled and opens an issue if something is missing.
|
||||
|
||||
### Adding Raw Data
|
||||
|
||||
Place your raw data files in the `Data/EXP-XXXX/raw/` directory and list them in the "Raw Data & Resources" section of the experiment file.
|
||||
|
||||
## Integration with Agent Runner
|
||||
|
||||
To integrate this functionality with your existing setup:
|
||||
|
||||
1. Add the functions from `Agent/experiment_handler_patch.py` to your `agent_runner.py` file
|
||||
2. Update your function definition list to include the new multiblock experiment functions
|
||||
3. Ensure proper imports (os, re, datetime) are at the top of your file
|
||||
|
||||
## Example Workflow
|
||||
|
||||
1. **Create experiment**: "Create a multiblock experiment for CRISPR knockout of gene X in HEK293T cells"
|
||||
2. **Update progress**: "Update experiment EXP-0226 with Day 1 results: Transfection efficiency 85%"
|
||||
3. **Check tasks**: "Mark tasks 1 and 2 as complete in experiment EXP-0226"
|
||||
4. **Add data**: "Record that I've added qPCR data in Data/EXP-0226/raw/knockout_validation.xlsx"
|
||||
5. **Complete experiment**: "Mark experiment EXP-0226 as completed with interpretation: Successful knockout with 95% efficiency"
|
||||
|
||||
## Customization
|
||||
|
||||
You can modify the template at `Templates/experiment_multiblock.md` to adjust the sections or add new ones specific to your lab's needs.
|
@ -8,6 +8,7 @@ This file serves as a robot table of contents for the repository. It lists the s
|
||||
- **ENVIRONMENT_SETUP.md**: Guide for setting up the development environment, including GitHub CLI and OpenAI API key configuration.
|
||||
- **ISSUES_LOG.md**: Logs all GitHub issues created, automatically updated by the Lab Agent.
|
||||
- **LAB_AGENT_GUIDE.md**: Detailed guide on how to use the Lab Agent, including examples and troubleshooting.
|
||||
- **README-multiblock-experiments.md**: No description available.
|
||||
- **README.md**: Main repository README with quick-start instructions and overview.
|
||||
- **TASKS.md**: Tracks ongoing lab and development tasks, automatically updated by the Lab Agent.
|
||||
- **branching_explainer.md**: No description available.
|
||||
@ -18,14 +19,14 @@ This file serves as a robot table of contents for the repository. It lists the s
|
||||
## Additional Directories
|
||||
|
||||
- **Agent/**: Contains the code for the AI agent integration, including the task-runner and hooks.
|
||||
- **Aims/**: Directory containing repository files.
|
||||
- **Analysis/**: Directory containing repository files.
|
||||
- **Cell-prep-forms/**: Directory containing repository files.
|
||||
- **Data/**: Storage for data outputs or references to data.
|
||||
- **Experiments/**: Records of individual experiments or lab sessions.
|
||||
- **Templates/**: Contains starter templates for various YAML structures.
|
||||
- **agent-case-studies/**: Directory containing repository files.
|
||||
- **cursor_env/**: Directory containing repository files.
|
||||
- **protocols/**: Directory containing repository files.
|
||||
- **random_scripts/**: Directory containing repository files.
|
||||
|
||||
---
|
||||
|
||||
|
20
TASKS.md
20
TASKS.md
@ -24,10 +24,30 @@ _Note: This file is now automatically updated by the agent/task-runner. Tasks ar
|
||||
- [ ] Day 4 (May 9, 2025): Synthesize cDNA and perform qPCR
|
||||
- [ ] Submit final data to repository and link to experimental record
|
||||
|
||||
### YBX1-CEBPA Co-Immunoprecipitation (EXP-0226)
|
||||
- [ ] Day 1 (May 8, 2025): Set up 3T3 cells
|
||||
- [ ] Seed 3T3 cells in six 10 cm dishes
|
||||
- [ ] Prepare stock solutions for adipogenic induction
|
||||
- [ ] Day 2 (May 9, 2025): Begin adipogenic treatment
|
||||
- [ ] Prepare fresh adipogenic induction medium
|
||||
- [ ] Treat cells with control or adipogenic medium
|
||||
- [ ] Day 3 (May 10, 2025): Harvest and process cells
|
||||
- [ ] Collect cell lysates from all conditions
|
||||
- [ ] Measure protein concentrations
|
||||
- [ ] Set up antibody incubation for co-immunoprecipitation
|
||||
- [ ] Day 4 (May 11, 2025): Complete Co-IP and Western blotting
|
||||
- [ ] Process IP samples with protein A/G beads
|
||||
- [ ] Run SDS-PAGE gels and transfer to membranes
|
||||
- [ ] Blot for protein interactions
|
||||
- [ ] Image and quantify results
|
||||
- [ ] Analyze protein interaction data and prepare figures
|
||||
|
||||
### Materials and Resources
|
||||
- [ ] Confirm Actinomycin D stock availability
|
||||
- [ ] Ensure sufficient RNA extraction reagents are available
|
||||
- [ ] Check qPCR primer stocks for all target genes
|
||||
- [ ] Order YBX1 and C/EBPα antibodies for Co-IP
|
||||
- [ ] Verify Invitrogen gel and transfer system availability
|
||||
|
||||
## Development Tasks
|
||||
- [ ] Reorganize repository structure: convert "projects" to "aims" and "subprojects" to "projects" for better GitHub integration
|
||||
|
127
Templates/experiment_multiblock.md
Normal file
127
Templates/experiment_multiblock.md
Normal file
@ -0,0 +1,127 @@
|
||||
---
|
||||
# EXPERIMENT METADATA
|
||||
experiment_id: EXP-XXXX
|
||||
title: "EXPERIMENT_TITLE"
|
||||
date: YYYY-MM-DD
|
||||
researchers:
|
||||
- RESEARCHER1
|
||||
- RESEARCHER2
|
||||
protocol_id: PROT-XXXX
|
||||
protocol_name: "PROTOCOL_NAME"
|
||||
status: planned # planned | in-progress | completed | failed
|
||||
aim: "Brief description of experimental aim"
|
||||
project: "PROJECT_NAME"
|
||||
---
|
||||
|
||||
---
|
||||
# SAMPLE METADATA
|
||||
cell_lines:
|
||||
- name: "CELL_LINE1"
|
||||
media: "MEDIA_TYPE"
|
||||
passage: "PASSAGE_NUMBER"
|
||||
- name: "CELL_LINE2"
|
||||
media: "MEDIA_TYPE"
|
||||
passage: "PASSAGE_NUMBER"
|
||||
plate_format: "24-well" # 6-well | 24-well | 96-well | etc.
|
||||
condition_map: |
|
||||
A1-A6: CELL_LINE1 + TREATMENT1
|
||||
B1-B6: CELL_LINE1 + TREATMENT2
|
||||
C1-C6: CELL_LINE2 + TREATMENT1
|
||||
D1-D6: CELL_LINE2 + TREATMENT2
|
||||
replicates: 6
|
||||
---
|
||||
|
||||
---
|
||||
# REAGENTS & INSTRUMENT SETTINGS
|
||||
transfection:
|
||||
reagent: "TRANSFECTION_REAGENT"
|
||||
volume_per_well: "XX µL"
|
||||
complex_volume: "XX µL"
|
||||
incubation_time: "XX min"
|
||||
treatments:
|
||||
- name: "TREATMENT1"
|
||||
concentration: "XX µM/nM"
|
||||
duration: "XX h"
|
||||
- name: "TREATMENT2"
|
||||
concentration: "XX µM/nM"
|
||||
duration: "XX h"
|
||||
timepoints:
|
||||
- "0h"
|
||||
- "Xh"
|
||||
- "Xh"
|
||||
instruments:
|
||||
- name: "INSTRUMENT_NAME"
|
||||
settings: "RELEVANT_SETTINGS"
|
||||
---
|
||||
|
||||
# 1️⃣ Experiment Timeline & Execution
|
||||
|
||||
## Day 1: YYYY-MM-DD
|
||||
- [ ] Task 1: DESCRIPTION
|
||||
- [ ] Task 2: DESCRIPTION
|
||||
|
||||
## Day 2: YYYY-MM-DD
|
||||
- [ ] Task 1: DESCRIPTION
|
||||
- [ ] Task 2: DESCRIPTION
|
||||
|
||||
## Day 3: YYYY-MM-DD
|
||||
- [ ] Task 1: DESCRIPTION
|
||||
- [ ] Task 2: DESCRIPTION
|
||||
|
||||
# 2️⃣ Raw Data & Resources
|
||||
_Place files in `Data/{{experiment_id}}/raw/` and list/link them here._
|
||||
|
||||
| Filename | Description | Date Added |
|
||||
|----------|-------------|------------|
|
||||
| `filename1.xlsx` | DESCRIPTION | YYYY-MM-DD |
|
||||
| `filename2.csv` | DESCRIPTION | YYYY-MM-DD |
|
||||
| `image1.png` | DESCRIPTION | YYYY-MM-DD |
|
||||
|
||||
# 3️⃣ Results & Analysis
|
||||
|
||||
## QC Metrics
|
||||
_Add quality control results, RNA integrity, transfection efficiency, etc._
|
||||
|
||||
## Primary Results
|
||||
_Add tables, plots, or images of key results._
|
||||
|
||||
```markdown
|
||||
# Insert code blocks, tables, or embed plots here
|
||||
```
|
||||
|
||||
## Analysis Notes
|
||||
_Add notes about analysis methods, tools used, etc._
|
||||
|
||||
Analysis script: `Analysis/{{experiment_id}}_analysis.R`
|
||||
|
||||
# 4️⃣ Interpretation
|
||||
|
||||
## Summary of Findings
|
||||
_Provide a concise summary of key findings (2-3 paragraphs)._
|
||||
|
||||
## Challenges & Limitations
|
||||
_Note any issues encountered or limitations of the experiment._
|
||||
|
||||
## Relation to Project Goals
|
||||
_Explain how these results contribute to the larger project._
|
||||
|
||||
# 5️⃣ Next Steps ✅
|
||||
_Check boxes when complete. These can auto-update TASKS.md._
|
||||
|
||||
- [ ] Follow-up experiment: DESCRIPTION
|
||||
- [ ] Additional analysis: DESCRIPTION
|
||||
- [ ] Present results at lab meeting on YYYY-MM-DD
|
||||
- [ ] Update protocol based on findings
|
||||
- [ ] Other: DESCRIPTION
|
||||
|
||||
# 6️⃣ Team Discussion
|
||||
_Use this section for team comments, suggestions, and feedback._
|
||||
|
||||
> **RESEARCHER1 (YYYY-MM-DD):** Comment text here.
|
||||
|
||||
> **RESEARCHER2 (YYYY-MM-DD):** Comment text here.
|
||||
|
||||
# 7️⃣ References & Related Experiments
|
||||
- Related protocol: [PROTOCOL_NAME](Protocols/protocol_file.yaml)
|
||||
- Previous experiment: [EXP-XXXX](Experiments/experiment_file.md)
|
||||
- Literature: CITATION
|
@ -1,17 +1,39 @@
|
||||
experiment_id: EXP-XXXX
|
||||
project: Example Project Name
|
||||
title: Example Experiment Title
|
||||
date: YYYY-MM-DD
|
||||
researcher: Your Name
|
||||
protocol: Example Protocol Name (v1.0)
|
||||
materials:
|
||||
Material: Example Material (lot #)
|
||||
parameters:
|
||||
Parameter1: value
|
||||
Parameter2: value
|
||||
results:
|
||||
images: ["Data/Images/example_image1.png"]
|
||||
observations: "Example observations."
|
||||
status: planned
|
||||
experiment_id: EXP-YYYYMMDD
|
||||
aim: Brief description of experiment aim
|
||||
project: Project_Name
|
||||
researcher: username
|
||||
status: in_progress
|
||||
created: YYYY-MM-DD
|
||||
|
||||
plate:
|
||||
id: PLATE_ID
|
||||
layout:
|
||||
A1: {gene: "Example1", perturbation: "siRNA", day: 0}
|
||||
A2: {gene: "Example2", perturbation: "siRNA", day: 0}
|
||||
A3: {gene: "NTC", perturbation: "control", day: 0}
|
||||
|
||||
tasks: # one row per GitHub Issue
|
||||
- id: 123 # GitHub Issue number
|
||||
title: "Seed cells on plate"
|
||||
status: open
|
||||
- id: 124
|
||||
title: "Transfect siRNA"
|
||||
status: open
|
||||
|
||||
sample_preparation:
|
||||
method: Sample preparation method
|
||||
date: YYYY-MM-DD
|
||||
|
||||
downstream_application:
|
||||
assay_type: qPCR
|
||||
targets: ["Gene1", "Gene2", "Control"]
|
||||
date: YYYY-MM-DD
|
||||
|
||||
data: # added automatically by record_data()
|
||||
- path: Data/EXP-YYYYMMDD/example_data.csv
|
||||
type: qPCR
|
||||
sha256: abcdef1234567890
|
||||
added: YYYY-MM-DD
|
||||
|
||||
notes: |
|
||||
Additional notes or deviations from protocol.
|
127
protocols/adipogenic_induction_treatment_v1.yaml
Normal file
127
protocols/adipogenic_induction_treatment_v1.yaml
Normal file
@ -0,0 +1,127 @@
|
||||
---
|
||||
# Protocol metadata
|
||||
id: PROT-0036
|
||||
name: Adipogenic Induction Treatment
|
||||
version: 1.0
|
||||
description: Protocol for inducing adipogenesis in preadipocyte cells using a combination of IBMX, dexamethasone, and insulin
|
||||
author: James M. Jordan
|
||||
created: 2025-05-07
|
||||
last_updated: 2025-05-07
|
||||
category: cell-treatment
|
||||
|
||||
# Materials required
|
||||
materials:
|
||||
- name: 3-Isobutyl-1-methylxanthine (IBMX)
|
||||
concentration: 0.5 mM final
|
||||
storage: -20°C
|
||||
preparation: Dissolve in DMSO to make 500X stock (250 mM)
|
||||
supplier: Sigma-Aldrich (I5879)
|
||||
- name: Dexamethasone
|
||||
concentration: 1 µM final
|
||||
storage: -20°C
|
||||
preparation: Dissolve in ethanol to make 1000X stock (1 mM)
|
||||
supplier: Sigma-Aldrich (D4902)
|
||||
- name: Insulin
|
||||
concentration: 10 µg/mL final
|
||||
storage: -20°C
|
||||
preparation: Dissolve in acidified water (pH 4.5) to make 1000X stock (10 mg/mL)
|
||||
supplier: Sigma-Aldrich (I6634)
|
||||
- name: DMEM high glucose
|
||||
storage: 4°C
|
||||
supplier: Gibco
|
||||
- name: Fetal Bovine Serum (FBS)
|
||||
concentration: 10% final
|
||||
storage: -20°C (aliquots)
|
||||
supplier: Gibco
|
||||
- name: Penicillin-Streptomycin
|
||||
concentration: 1% final
|
||||
storage: -20°C
|
||||
supplier: Gibco
|
||||
- name: Complete growth medium
|
||||
composition: DMEM + 10% FBS + 1% Pen-Strep
|
||||
storage: 4°C
|
||||
|
||||
# Equipment required
|
||||
equipment:
|
||||
- name: Biosafety cabinet
|
||||
certification: Class II
|
||||
- name: CO2 incubator
|
||||
settings: 37°C, 5% CO2, humidified
|
||||
- name: Water bath
|
||||
settings: 37°C
|
||||
- name: Serological pipettes
|
||||
sizes: 5 mL, 10 mL, 25 mL
|
||||
- name: Micropipettes
|
||||
sizes: P1000, P200, P20
|
||||
|
||||
# Protocol steps
|
||||
steps:
|
||||
- step: 1
|
||||
action: "Prepare complete growth medium"
|
||||
details: "To 500 mL DMEM high glucose, add 50 mL FBS and 5 mL Pen-Strep. Mix well and warm to 37°C before use."
|
||||
- step: 2
|
||||
action: "Thaw induction reagent stocks"
|
||||
details: "Remove IBMX, dexamethasone, and insulin stock solutions from -20°C and thaw at room temperature. Protect from light."
|
||||
- step: 3
|
||||
action: "Prepare adipogenic induction medium (AIM)"
|
||||
details: "To complete growth medium, add IBMX (final 0.5 mM), dexamethasone (final 1 µM), and insulin (final 10 µg/mL). Mix thoroughly but gently by inverting."
|
||||
- step: 4
|
||||
action: "Warm media"
|
||||
details: "Warm both complete growth medium (control) and adipogenic induction medium to 37°C before adding to cells."
|
||||
- step: 5
|
||||
action: "Aspirate existing medium from cells"
|
||||
details: "Using a sterile aspirator, carefully remove all existing medium from the cell culture vessel."
|
||||
- step: 6
|
||||
action: "Add fresh medium"
|
||||
details: "Add appropriate volume of either complete growth medium (control) or adipogenic induction medium to the cells."
|
||||
- step: 7
|
||||
action: "Return cells to incubator"
|
||||
details: "Place cell culture vessels in 37°C, 5% CO2 incubator."
|
||||
- step: 8
|
||||
action: "Maintain treatment"
|
||||
details: "For standard protocol, maintain cells in adipogenic induction medium for 3 days, then switch to insulin-only medium (10 µg/mL insulin in complete medium) for additional 4-11 days."
|
||||
|
||||
# Critical parameters
|
||||
critical_parameters:
|
||||
- parameter: "Cell confluence"
|
||||
details: "Cells should be at 100% confluence at the time of induction. Post-confluent cells (2 days after reaching confluence) often yield better differentiation."
|
||||
- parameter: "Reagent concentration"
|
||||
details: "IBMX (0.5 mM), dexamethasone (1 µM), and insulin (10 µg/mL) concentrations are critical. Prepare fresh stocks if uncertain about stability."
|
||||
- parameter: "Media change frequency"
|
||||
details: "After the initial 3-day induction period, change to insulin-only medium and then change medium every 2-3 days for optimal differentiation."
|
||||
|
||||
# Troubleshooting
|
||||
troubleshooting:
|
||||
- problem: "Poor differentiation"
|
||||
solution: "Ensure cells were 100% confluent before induction; check reagent quality and concentrations; extend post-confluent period to 2 days before induction."
|
||||
- problem: "Cell detachment"
|
||||
solution: "Handle cells gently during media changes; ensure plate surface is appropriate for adipocyte culture; consider using collagen-coated plates."
|
||||
- problem: "Contamination"
|
||||
solution: "Use sterile technique; check medium and reagents for contamination; consider adding additional antibiotics."
|
||||
|
||||
# Safety considerations
|
||||
safety:
|
||||
ppe: "Lab coat, gloves, and eye protection required"
|
||||
hazards: "DMSO (IBMX solvent) can enhance skin penetration of other chemicals; dexamethasone is a synthetic glucocorticoid with potential health effects."
|
||||
disposal: "Dispose of media and solutions according to institutional guidelines for biological waste."
|
||||
|
||||
# Expected outcomes
|
||||
expected_outcomes:
|
||||
- outcome: "3T3-L1 cells should begin showing lipid droplet formation within 3-5 days"
|
||||
- outcome: "Maximum differentiation typically reached by day 8-10"
|
||||
- outcome: "Adipogenic marker genes (PPARγ, C/EBPα, FABP4, etc.) upregulated within 1-2 days"
|
||||
- outcome: "Early adipogenic transcription factors (C/EBPβ, C/EBPδ) upregulated within hours"
|
||||
|
||||
# References
|
||||
references:
|
||||
- "Zebisch K, et al. (2012) Protocol for effective differentiation of 3T3-L1 cells to adipocytes. Anal Biochem. 425(1):88-90."
|
||||
- "Green H, Kehinde O. (1975) An established preadipose cell line and its differentiation in culture II. Factors affecting the adipose conversion. Cell. 5(1):19-27."
|
||||
- "Rubin CS, et al. (1978) Development of hormone receptors and hormonal responsiveness in vitro. Insulin receptors and insulin sensitivity in the preadipocyte and adipocyte forms of 3T3-L1 cells. J Biol Chem. 253(20):7570-7578."
|
||||
|
||||
# Notes
|
||||
notes: |
|
||||
- This protocol is optimized for 3T3-L1 cells but can be adapted for other preadipocyte cell lines or primary cells.
|
||||
- Cell response to adipogenic induction can vary between passages, so consistency in culture conditions is important.
|
||||
- For experiment termination at 24h post-induction, cells will only show early adipogenic markers (C/EBPβ, C/EBPδ) but not mature adipocyte phenotype.
|
||||
- YBX1 has been reported to interact with C/EBPα during early adipogenesis as part of transcriptional regulation.
|
||||
---
|
@ -1,93 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Protocol Format Checker
|
||||
This script checks which YAML protocol files have been updated with the consistent professional format
|
||||
and which ones still need to be fixed.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import sys
|
||||
|
||||
def check_protocol_format(protocol_path):
|
||||
"""Check if a protocol file has the expected professional format sections."""
|
||||
try:
|
||||
with open(protocol_path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# If file is empty or very small, it's probably not formatted properly
|
||||
if len(content) < 100:
|
||||
return False
|
||||
|
||||
# Check for key sections that indicate our professional format
|
||||
required_sections = [
|
||||
"# Protocol metadata",
|
||||
"# Materials required",
|
||||
"# Equipment required",
|
||||
"# Protocol steps",
|
||||
"# Critical parameters",
|
||||
"last_updated:",
|
||||
"category:",
|
||||
]
|
||||
|
||||
# Check for numbered steps format
|
||||
step_format = "step: "
|
||||
|
||||
# Count how many required sections are present
|
||||
section_count = 0
|
||||
has_step_format = False
|
||||
|
||||
for section in required_sections:
|
||||
if section in content:
|
||||
section_count += 1
|
||||
|
||||
if step_format in content:
|
||||
has_step_format = True
|
||||
|
||||
# If it has most of the sections and the step format, consider it updated
|
||||
return section_count >= 5 and has_step_format
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error checking {protocol_path}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main function to check all protocol files."""
|
||||
protocol_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# Get all YAML files in the protocols directory
|
||||
protocol_files = []
|
||||
for root, _, files in os.walk(protocol_dir):
|
||||
for file in files:
|
||||
if file.endswith('.yaml'):
|
||||
protocol_files.append(os.path.join(root, file))
|
||||
|
||||
# Check each protocol file
|
||||
updated = []
|
||||
need_update = []
|
||||
|
||||
for protocol in protocol_files:
|
||||
is_updated = check_protocol_format(protocol)
|
||||
file_name = os.path.basename(protocol)
|
||||
|
||||
if is_updated:
|
||||
updated.append(file_name)
|
||||
else:
|
||||
need_update.append(file_name)
|
||||
|
||||
# Print results as a checklist
|
||||
print("\n===== PROTOCOL FORMATTING CHECKLIST =====")
|
||||
print(f"Total protocols: {len(protocol_files)}")
|
||||
print(f"Updated protocols: {len(updated)} ✓")
|
||||
print(f"Protocols needing update: {len(need_update)} ✗\n")
|
||||
|
||||
print("UPDATED PROTOCOLS:")
|
||||
for i, protocol in enumerate(sorted(updated), 1):
|
||||
print(f" ✓ {i}. {protocol}")
|
||||
|
||||
print("\nPROTOCOLS NEEDING UPDATE:")
|
||||
for i, protocol in enumerate(sorted(need_update), 1):
|
||||
print(f" ✗ {i}. {protocol}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user