Add PR action to validate notebook format (#1793)

This commit is contained in:
Brandon Baker 2025-04-28 03:33:02 -07:00 committed by GitHub
parent 8fd8b9bfbb
commit 3c2a4de1ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 124 additions and 39 deletions

61
.github/scripts/check_notebooks.py vendored Normal file
View File

@ -0,0 +1,61 @@
import subprocess
import sys
from pathlib import Path
import nbformat
def get_changed_notebooks(base_ref: str = "origin/main") -> list[Path]:
"""
Returns a list of changed notebook paths in the current git branch
compared to the specified base reference.
"""
result = subprocess.run(
["git", "diff", "--name-only", base_ref, "--", "*.ipynb"],
capture_output=True,
text=True,
check=True,
)
return [Path(line.strip()) for line in result.stdout.splitlines() if line.strip()]
def is_valid_notebook(path: Path) -> bool:
"""
Checks if the notebook at the given path is valid by attempting to read it
with nbformat.
"""
try:
with open(path, "r", encoding="utf-8") as f:
nbformat.read(f, as_version=4)
return True
except Exception as e:
print(f"{path}: INVALID - {e}")
return False
def main() -> None:
"""
Main function to validate the format of changed notebooks.
"""
changed_notebooks = get_changed_notebooks()
if not changed_notebooks:
print("No changed .ipynb files to validate.")
sys.exit(0)
print(f"Validating {len(changed_notebooks)} notebook(s)...")
errors = 0
for path in changed_notebooks:
if not path.exists():
continue # skip deleted files
if not is_valid_notebook(path):
errors += 1
if errors:
print(f"{errors} invalid notebook(s) found.")
sys.exit(1)
else:
print("All changed notebooks are valid.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,25 @@
name: Validate Changed Notebooks
on: [pull_request]
jobs:
validate-notebooks:
name: Validate Notebooks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # needed for git diff to work
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install dependencies
run: pip install nbformat
- name: Validate changed .ipynb files
run: python .github/scripts/check_notebooks.py

3
.gitignore vendored
View File

@ -140,3 +140,6 @@ examples/fine-tuned_qa/local_cache/*
# PyCharm files
.idea/
# VS Code files
.vscode/

View File

@ -41,10 +41,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mDEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
@ -392,17 +388,17 @@
"text": [
"\tGroundtruth\n",
"\tTrue\tFalse\tNEE\n",
"True\t12\t4\t16\t\n",
"False\t0\t4\t3\t\n",
"NEE\t6\t2\t3\t\n"
"True\t9\t3\t15\t\n",
"False\t0\t3\t2\t\n",
"NEE\t8\t6\t4\t\n"
]
},
{
"data": {
"text/plain": [
"{'True': {'True': 12, 'False': 4, 'NEE': 16},\n",
" 'False': {'True': 0, 'False': 4, 'NEE': 3},\n",
" 'NEE': {'True': 6, 'False': 2, 'NEE': 3}}"
"{'True': {'True': 9, 'False': 3, 'NEE': 15},\n",
" 'False': {'True': 0, 'False': 3, 'NEE': 2},\n",
" 'NEE': {'True': 8, 'False': 6, 'NEE': 4}}"
]
},
"execution_count": 10,
@ -683,17 +679,17 @@
"text": [
"\tGroundtruth\n",
"\tTrue\tFalse\tNEE\n",
"True\t13\t0\t3\t\n",
"False\t0\t9\t3\t\n",
"NEE\t5\t1\t16\t\n"
"True\t13\t1\t4\t\n",
"False\t1\t10\t2\t\n",
"NEE\t3\t1\t15\t\n"
]
},
{
"data": {
"text/plain": [
"{'True': {'True': 13, 'False': 0, 'NEE': 3},\n",
" 'False': {'True': 0, 'False': 9, 'NEE': 3},\n",
" 'NEE': {'True': 5, 'False': 1, 'NEE': 16}}"
"{'True': {'True': 13, 'False': 1, 'NEE': 4},\n",
" 'False': {'True': 1, 'False': 10, 'NEE': 2},\n",
" 'NEE': {'True': 3, 'False': 1, 'NEE': 15}}"
]
},
"execution_count": 16,
@ -774,17 +770,17 @@
"text": [
"\tGroundtruth\n",
"\tTrue\tFalse\tNEE\n",
"True\t6\t0\t3\t\n",
"False\t0\t3\t0\t\n",
"NEE\t12\t7\t19\t\n"
"True\t9\t0\t1\t\n",
"False\t0\t7\t0\t\n",
"NEE\t8\t5\t20\t\n"
]
},
{
"data": {
"text/plain": [
"{'True': {'True': 6, 'False': 0, 'NEE': 3},\n",
" 'False': {'True': 0, 'False': 3, 'NEE': 0},\n",
" 'NEE': {'True': 12, 'False': 7, 'NEE': 19}}"
"{'True': {'True': 9, 'False': 0, 'NEE': 1},\n",
" 'False': {'True': 0, 'False': 7, 'NEE': 0},\n",
" 'NEE': {'True': 8, 'False': 5, 'NEE': 20}}"
]
},
"execution_count": 19,
@ -931,17 +927,17 @@
"text": [
"\tGroundtruth\n",
"\tTrue\tFalse\tNEE\n",
"True\t11\t0\t5\t\n",
"False\t0\t8\t1\t\n",
"NEE\t7\t2\t16\t\n"
"True\t13\t0\t3\t\n",
"False\t1\t10\t1\t\n",
"NEE\t3\t2\t17\t\n"
]
},
{
"data": {
"text/plain": [
"{'True': {'True': 11, 'False': 0, 'NEE': 5},\n",
" 'False': {'True': 0, 'False': 8, 'NEE': 1},\n",
" 'NEE': {'True': 7, 'False': 2, 'NEE': 16}}"
"{'True': {'True': 13, 'False': 0, 'NEE': 3},\n",
" 'False': {'True': 1, 'False': 10, 'NEE': 1},\n",
" 'NEE': {'True': 3, 'False': 2, 'NEE': 17}}"
]
},
"execution_count": 23,

View File

@ -694,7 +694,7 @@
date: 2025-04-23
authors:
- atroyn
- brandonbaker
- brandonbaker-openai
tags:
- embeddings
- completions