diff --git a/examples/evaluation/How_to_evaluate_LLMs_for_SQL_generation.ipynb b/examples/evaluation/How_to_evaluate_LLMs_for_SQL_generation.ipynb index 7b0ede1..de568db 100644 --- a/examples/evaluation/How_to_evaluate_LLMs_for_SQL_generation.ipynb +++ b/examples/evaluation/How_to_evaluate_LLMs_for_SQL_generation.ipynb @@ -7,9 +7,9 @@ "source": [ "# How to test and evaluate LLMs for SQL generation\n", "\n", - "LLMs are fundamentatlly non-deterministic in their responses, this attribute makes them wonderfully creative and dynamic in their responses. However, this trait poses significant challenges in achieving consistency, a crucial aspect for integrating LLMs into production environments.\n", + "LLMs are fundamentally non-deterministic in their responses, this attribute makes them wonderfully creative and dynamic in their responses. However, this trait poses significant challenges in achieving consistency, a crucial aspect for integrating LLMs into production environments.\n", "\n", - "The key to harnessing the potential of LLMs in practical applications lies in consistent and systematic evaluation. This enables the identification and rectification of inconsistencies and helps in monitoring progress over time as the application evolves.\n", + "The key to harnessing the potential of LLMs in practical applications lies in consistent and systematic evaluation. This enables the identification and rectification of inconsistencies and helps with monitoring progress over time as the application evolves.\n", "\n", "## Scope of this notebook\n", "\n", @@ -49,7 +49,18 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 1, + "id": "c7f325fc", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment this to install all necessary dependencies\n", + "# !pip install openai datasets pandas pydantic matplotlib python-dotenv numpy tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "id": "245fcedb", "metadata": {}, "outputs": [ @@ -57,7 +68,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "cannot find .env file\n" + "78577 rows\n" ] } ], @@ -73,13 +84,17 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from dotenv import load_dotenv\n", + "from tqdm.notebook import tqdm\n", + "from IPython.display import HTML, display\n", "\n", "# Loads key from local .env file to setup API KEY in env variables\n", "%reload_ext dotenv\n", "%dotenv\n", " \n", "GPT_MODEL = 'gpt-4o'\n", - "dataset = load_dataset(\"b-mc2/sql-create-context\")" + "dataset = load_dataset(\"b-mc2/sql-create-context\")\n", + "\n", + "print(dataset['train'].num_rows, \"rows\")" ] }, { @@ -102,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 3, "id": "f8027115", "metadata": {}, "outputs": [ @@ -190,7 +205,7 @@ "4 CREATE TABLE department (num_employees INTEGER... " ] }, - "execution_count": 151, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -205,10 +220,9 @@ "id": "b04cb5eb", "metadata": {}, "source": [ - "\n", "## Test development\n", "\n", - "To test to output of the LLM generations, we'll develop two unit tests and an evaluation, which will combine to give us a basic evaluation framework to grade the quality of our LLM iterations.\n", + "To test the output of the LLM generations, we'll develop two unit tests and an evaluation, which will combine to give us a basic evaluation framework to grade the quality of our LLM iterations.\n", "\n", "To re-iterate, our purpose is to measure the correctness and consistency of LLM output given our questions.\n", "\n", @@ -223,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 4, "id": "eb811101", "metadata": {}, "outputs": [], @@ -232,7 +246,10 @@ "\n", "\n", "class LLMResponse(BaseModel):\n", - " \"\"\"This simple Class expects to receive a JSON string that can be parsed into a `create` and `select` statement.\"\"\"\n", + " \"\"\"This is the structure that we expect the LLM to respond with.\n", + "\n", + " The LLM should respond with a JSON string with `create` and `select` fields.\n", + " \"\"\"\n", " create: str\n", " select: str" ] @@ -244,12 +261,12 @@ "source": [ "#### Prompt\n", "\n", - "For this demonstration purposes, we use a fairly simple prompt requesting GPT to generate a pair of context CREATE SQL and a answering SELECT SQL query. We supply the natural language question as part of the prompt. We request the response to be in JSON format, so that it can be parsed easily." + "For this demonstration purposes, we use a fairly simple prompt requesting GPT to generate a `(context, answer)` pair. `context` is the `CREATE` SQL statement, and `answer` is the `SELECT` SQL statement. We supply the natural language question as part of the prompt. We request the response to be in JSON format, so that it can be parsed easily." ] }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 5, "id": "c2be3ba4", "metadata": {}, "outputs": [ @@ -269,12 +286,12 @@ "system_prompt = '''Translate this natural language request into a JSON object containing two SQL queries. \n", "The first query should be a CREATE statement for a table answering the user's request, while the second should be a SELECT query answering their question.'''\n", "\n", - "pprint(system_prompt)\n" + "pprint(system_prompt)" ] }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 6, "id": "3a20d712", "metadata": {}, "outputs": [ @@ -298,13 +315,13 @@ "\n", "messages = []\n", "messages.append({\"role\": \"system\", \"content\": system_prompt})\n", - "messages.append({\"role\":\"user\",\"content\": sql_df.iloc[0]['question']})\n", + "messages.append({\"role\":\"user\", \"content\": sql_df.iloc[0]['question']})\n", "pprint(messages)" ] }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 7, "id": "38b704b3-6f0e-4708-bc70-96723d69da6f", "metadata": {}, "outputs": [], @@ -312,9 +329,6 @@ "# Sending the message array to GPT, requesting a response (ensure that you have API key loaded to Env for this step)\n", "\n", "client = OpenAI()\n", - "# completion = client.chat.completions.create(model = GPT_MODEL, messages = messages)\n", - "\n", - "\n", "completion = client.beta.chat.completions.parse(\n", " model=GPT_MODEL,\n", " messages=messages,\n", @@ -336,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 8, "id": "2b057391-4f83-4b5a-8843-a9ee74bee871", "metadata": {}, "outputs": [ @@ -344,10 +358,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "('{\"create\":\"CREATE TABLE department_heads (\\\\n id INT PRIMARY KEY,\\\\n '\n", - " 'name VARCHAR(255),\\\\n age INT,\\\\n department '\n", - " 'VARCHAR(255)\\\\n);\",\"select\":\"SELECT COUNT(*) FROM department_heads WHERE age '\n", - " '> 56;\"}')\n" + "{\"create\":\"CREATE TABLE department_heads (\\n id INT PRIMARY KEY,\\n name VARCHAR(100),\\n age INT\\n);\",\"select\":\"SELECT COUNT(*) FROM department_heads WHERE age > 56;\"}\n" ] } ], @@ -355,7 +366,7 @@ "# Viewing the output from GPT\n", "\n", "content = completion.choices[0].message.content\n", - "pprint(content)" + "print(content)" ] }, { @@ -370,7 +381,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 9, "id": "4c7133f1-74d6-43f1-9443-09a3f8308c35", "metadata": {}, "outputs": [], @@ -388,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 10, "id": "6a9a9128", "metadata": {}, "outputs": [ @@ -398,7 +409,7 @@ "True" ] }, - "execution_count": 158, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -419,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 11, "id": "a0a26690", "metadata": {}, "outputs": [ @@ -429,7 +440,7 @@ "text": [ "ERROR: Invalid schema: 1 validation error for LLMResponse\n", " Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='CREATE departments, select * from departments', input_type=str]\n", - " For further information visit https://errors.pydantic.dev/2.8/v/json_invalid\n" + " For further information visit https://errors.pydantic.dev/2.10/v/json_invalid\n" ] }, { @@ -438,7 +449,7 @@ "False" ] }, - "execution_count": 159, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -475,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 12, "id": "9cc95481", "metadata": {}, "outputs": [], @@ -518,51 +529,54 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 13, "id": "c6d2573d", "metadata": {}, "outputs": [], "source": [ - "def test_select(conn, cursor, select):\n", + "def test_select(conn, cursor, select, should_log=True):\n", " \"\"\"Tests that a SQLite select query can be executed successfully.\"\"\"\n", " try:\n", - " print(f\"Testing select query: {select}\")\n", + " if should_log:\n", + " print(f\"Testing select query: {select}\")\n", " cursor.execute(select)\n", " record = cursor.fetchall()\n", - " print(record)\n", + " if should_log:\n", + " print(f\"Result of query: {record}\")\n", "\n", " return True\n", "\n", " except sqlite3.Error as error:\n", - " print(\"Error while executing select query:\", error)\n", - "\n", + " if should_log:\n", + " print(\"Error while executing select query:\", error)\n", " return False\n", "\n", "\n", - "def test_create(conn, cursor, create):\n", + "def test_create(conn, cursor, create, should_log=True):\n", " \"\"\"Tests that a SQLite create query can be executed successfully\"\"\"\n", " try:\n", - " print(f\"Testing create query: {create}\")\n", + " if should_log:\n", + " print(f\"Testing create query: {create}\")\n", " cursor.execute(create)\n", " conn.commit()\n", "\n", " return True\n", "\n", " except sqlite3.Error as error:\n", - " print(\"Error while creating the SQLite table:\", error)\n", - "\n", + " if should_log:\n", + " print(\"Error while creating the SQLite table:\", error)\n", " return False\n", "\n", "\n", - "def test_llm_sql(LLMResponse):\n", + "def test_llm_sql(llm_response, should_log=True):\n", " \"\"\"Runs a suite of SQLite tests\"\"\"\n", " try:\n", " conn = create_connection()\n", " cursor = conn.cursor()\n", "\n", - " create_response = test_create(conn, cursor, LLMResponse.create)\n", + " create_response = test_create(conn, cursor, llm_response.create, should_log=should_log)\n", "\n", - " select_response = test_select(conn, cursor, LLMResponse.select)\n", + " select_response = test_select(conn, cursor, llm_response.select, should_log=should_log)\n", "\n", " if conn:\n", " close_connection(conn)\n", @@ -577,14 +591,14 @@ " return True\n", "\n", " except sqlite3.Error as error:\n", - " print(\"Error while creating a sqlite table\", error)\n", - "\n", + " if should_log:\n", + " print(\"Error while creating a sqlite table\", error)\n", " return False" ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 14, "id": "a9266753-4646-4901-bc14-632d3bf47aaa", "metadata": {}, "outputs": [ @@ -594,9 +608,8 @@ "text": [ "CREATE SQL is: CREATE TABLE department_heads (\n", " id INT PRIMARY KEY,\n", - " name VARCHAR(255),\n", - " age INT,\n", - " department VARCHAR(255)\n", + " name VARCHAR(100),\n", + " age INT\n", ");\n", "SELECT SQL is: SELECT COUNT(*) FROM department_heads WHERE age > 56;\n" ] @@ -612,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 15, "id": "83bc1f1b", "metadata": {}, "outputs": [ @@ -622,12 +635,11 @@ "text": [ "Testing create query: CREATE TABLE department_heads (\n", " id INT PRIMARY KEY,\n", - " name VARCHAR(255),\n", - " age INT,\n", - " department VARCHAR(255)\n", + " name VARCHAR(100),\n", + " age INT\n", ");\n", "Testing select query: SELECT COUNT(*) FROM department_heads WHERE age > 56;\n", - "[(0,)]\n" + "Result of query: [(0,)]\n" ] }, { @@ -636,7 +648,7 @@ "True" ] }, - "execution_count": 163, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -649,7 +661,7 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": 16, "id": "589c7cc7", "metadata": {}, "outputs": [ @@ -668,7 +680,7 @@ "False" ] }, - "execution_count": 164, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -688,14 +700,14 @@ "source": [ "### Evaluation\n", "\n", - "The last component is to **evaluate** whether the generate SQL actually answers the user's question. This test will be performed by `gpt-4o-mini`, and will assess how **relevant** the produced SQL query is when compared to the initial user request.\n", + "The last component is to **evaluate** whether the generated SQL actually answers the user's question. This test will be performed by `gpt-4o-mini`, and will assess how **relevant** the produced SQL query is when compared to the initial user request.\n", "\n", "This is a simple example which adapts an approach outlined in the [G-Eval paper](https://arxiv.org/abs/2303.16634), and tested in one of our other [cookbooks](https://github.com/openai/openai-cookbook/blob/main/examples/evaluation/How_to_eval_abstractive_summarization.ipynb)." ] }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 17, "id": "029c8426", "metadata": {}, "outputs": [], @@ -748,7 +760,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 18, "id": "85cfb78d", "metadata": {}, "outputs": [], @@ -779,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 167, + "execution_count": 19, "id": "607ee304", "metadata": {}, "outputs": [], @@ -789,15 +801,18 @@ "evaluation_results = []\n", "\n", "for x,y in sql_df.head(3).iterrows():\n", - " \n", - " score = get_geval_score(RELEVANCY_SCORE_CRITERIA,RELEVANCY_SCORE_STEPS,y['question'],y['context'] + '\\n' + y['answer'],'relevancy')\n", - " \n", + " score = get_geval_score(\n", + " RELEVANCY_SCORE_CRITERIA,\n", + " RELEVANCY_SCORE_STEPS,\n", + " y['question'],\n", + " y['context'] + '\\n' + y['answer'],'relevancy'\n", + " )\n", " evaluation_results.append((y['question'],y['context'] + '\\n' + y['answer'],score))" ] }, { "cell_type": "code", - "execution_count": 168, + "execution_count": 20, "id": "bd1002c2", "metadata": {}, "outputs": [ @@ -866,32 +881,22 @@ }, { "cell_type": "code", - "execution_count": 169, + "execution_count": 21, "id": "85c44a17", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('Translate this natural language request into a JSON object containing two SQL queries. \\n'\n", - " \"The first query should be a CREATE statement for a table answering the user's request, while the second should be a \"\n", - " 'SELECT query answering their question. \\n')\n" - ] - } - ], + "outputs": [], "source": [ "# Set first system prompt\n", - "system_prompt = \"\"\"Translate this natural language request into a JSON object containing two SQL queries. \n", - "The first query should be a CREATE statement for a table answering the user's request, while the second should be a SELECT query answering their question. \n", - "\"\"\"\n", + "system_prompt = \"\"\"Translate this natural language request into a JSON object containing two SQL queries.\n", "\n", - "pprint(system_prompt, width = 120)" + "The first query should be a CREATE statement for a table answering the user's request, while the second\n", + "should be a SELECT query answering their question. \n", + "\"\"\"" ] }, { "cell_type": "code", - "execution_count": 170, + "execution_count": 22, "id": "1244c44e", "metadata": {}, "outputs": [], @@ -899,15 +904,13 @@ "def get_response(system_prompt,user_message,model=GPT_MODEL):\n", " messages = []\n", " messages.append({\"role\": \"system\", \"content\": system_prompt})\n", - " messages.append({\"role\":\"user\",\"content\": user_message})\n", + " messages.append({\"role\": \"user\", \"content\": user_message})\n", "\n", " response = client.beta.chat.completions.parse(\n", " model=GPT_MODEL,\n", " messages=messages,\n", " response_format=LLMResponse,\n", " )\n", - "\n", - " \n", " # response = client.chat.completions.create(model=GPT_MODEL,messages=messages,temperature=0,response_format=LLMResponse)\n", " \n", " return response.choices[0].message.content" @@ -925,493 +928,79 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": 23, "id": "a98afa30", "metadata": {}, "outputs": [], "source": [ - "def execute_unit_tests(input_df,output_list,system_prompt):\n", + "def execute_unit_tests(input_df, output_list, system_prompt):\n", " \"\"\"Unit testing function that takes in a dataframe and appends test results to an output_list.\n", - " The system prompt is configurable to allow us to test a couple with this framework.\"\"\"\n", "\n", - " for x,y in input_df.iterrows():\n", - " model_response = get_response(system_prompt,y['question'])\n", + " This allows us to test multiple system prompts.\"\"\"\n", + "\n", + " for x, y in tqdm(input_df.iterrows(), total=len(input_df)):\n", + " model_response = get_response(system_prompt, y['question'])\n", "\n", " format_valid = test_valid_schema(model_response)\n", "\n", " try:\n", " test_query = LLMResponse.model_validate_json(model_response)\n", - " sql_valid = test_llm_sql(test_query)\n", - "\n", + " # Avoid logging since we're executing many rows at once\n", + " sql_valid = test_llm_sql(test_query, should_log=False)\n", " except:\n", " sql_valid = False\n", "\n", - " output_list.append((y['question'],model_response,format_valid,sql_valid))\n", + " output_list.append((y['question'], model_response, format_valid, sql_valid))\n", " \n", "def evaluate_row(row):\n", - " \"\"\"Simple evaluation function to categorize unit testing results. \n", + " \"\"\"Simple evaluation function to categorize unit testing results.\n", + " \n", " If the format or SQL are flagged it returns a label, otherwise it is correct\"\"\"\n", - " if row['format'] == False:\n", + " if row['format'] is False:\n", " return 'Format incorrect'\n", - " \n", - " elif row['sql'] == False:\n", + " elif row['sql'] is False:\n", " return 'SQL incorrect'\n", - " \n", " else:\n", " return 'SQL correct'" ] }, { "cell_type": "code", - "execution_count": 172, + "execution_count": 24, "id": "898e5069", "metadata": {}, "outputs": [], "source": [ - "# Select 100 unseen queries to test this one\n", + "# Select 50 unseen queries to test this one\n", "test_df = sql_df.tail(50)" ] }, { "cell_type": "code", - "execution_count": 173, + "execution_count": 25, "id": "2baec278", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Testing create query: CREATE TABLE cricket_partnerships (\n", - " id INT PRIMARY KEY,\n", - " player1 VARCHAR(50),\n", - " player2 VARCHAR(50),\n", - " venue VARCHAR(100),\n", - " match_date DATE\n", - ");\n", - "Testing select query: SELECT venue FROM cricket_partnerships WHERE player1 = 'Shoaib Malik' AND player2 = 'Misbah-ul-Haq' OR player1 = 'Misbah-ul-Haq' AND player2 = 'Shoaib Malik';\n", - "[]\n", - "Testing create query: CREATE TABLE CricketPartnerships (\n", - " id INT PRIMARY KEY,\n", - " player1 VARCHAR(255),\n", - " player2 VARCHAR(255),\n", - " venue VARCHAR(255),\n", - " date DATE,\n", - " runs_scored INT\n", - ");\n", - "Testing select query: SELECT venue FROM CricketPartnerships WHERE player1 = 'Herschelle Gibbs' AND player2 = 'Justin Kemp';\n", - "[]\n", - "Testing create query: CREATE TABLE PointsTable (\n", - " NumberPlayed INT,\n", - " Points INT\n", - ");\n", - "Testing select query: SELECT NumberPlayed FROM PointsTable WHERE Points = 310;\n", - "[]\n", - "Testing create query: CREATE TABLE sports_stats (\n", - " team_id INTEGER PRIMARY KEY,\n", - " team_name TEXT,\n", - " points_against INTEGER,\n", - " losing_bonus INTEGER\n", - ");\n", - "Testing select query: SELECT losing_bonus FROM sports_stats WHERE points_against = 588;\n", - "[]\n", - "Testing create query: CREATE TABLE rugby_points (\n", - " id SERIAL PRIMARY KEY,\n", - " team_name VARCHAR(100),\n", - " tries_against INT,\n", - " losing_bonus INT\n", - ");\n", - "Testing select query: SELECT * FROM rugby_points WHERE tries_against = 7 AND losing_bonus > 0;\n", - "[]\n", - "Testing create query: CREATE TABLE rugby_stats (\n", - " team_name VARCHAR(50),\n", - " games_played INT,\n", - " tries_scored INT,\n", - " try_bonus INT,\n", - " points_against INT\n", - ");\n", - "Testing select query: SELECT try_bonus FROM rugby_stats WHERE points_against = 488;\n", - "[]\n", - "Testing create query: CREATE TABLE Points (\n", - " id INT PRIMARY KEY,\n", - " description VARCHAR(255),\n", - " try_bonus INT\n", - ");\n", - "Testing select query: SELECT * FROM Points WHERE try_bonus = 140;\n", - "[]\n", - "Testing create query: CREATE TABLE Matches (\n", - " MatchID INT PRIMARY KEY,\n", - " TeamName VARCHAR(255),\n", - " Drawn INT,\n", - " TriesAgainst INT\n", - ");\n", - "Testing select query: SELECT TeamName FROM Matches WHERE Drawn = 1 AND TriesAgainst = 0;\n", - "[]\n", - "Testing create query: CREATE TABLE Champions (id INT PRIMARY KEY, name VARCHAR(100), reign_days INT, defenses INT);\n", - "Testing select query: SELECT reign_days FROM Champions WHERE reign_days > 3 AND defenses = 1;\n", - "[]\n", - "Testing create query: CREATE TABLE champions (\n", - " id INTEGER PRIMARY KEY,\n", - " name VARCHAR(255),\n", - " reign_days INTEGER,\n", - " defenses INTEGER\n", - ");\n", - "Testing select query: SELECT reign_days FROM champions WHERE reign_days > 3 AND defenses < 1;\n", - "[]\n", - "Testing create query: CREATE TABLE ChampionReigns (\n", - " id INT PRIMARY KEY,\n", - " champion_name VARCHAR(255) NOT NULL,\n", - " total_defenses INT NOT NULL,\n", - " days_held INT NOT NULL\n", - ");\n", - "Testing select query: SELECT AVG(total_defenses) AS average_defenses\n", - "FROM ChampionReigns\n", - "WHERE days_held = 404 AND TOTAL_REIGNS > 1;\n", - "Error while executing select query: no such column: TOTAL_REIGNS\n", - "Testing create query: CREATE TABLE Champions (\n", - " id INT PRIMARY KEY,\n", - " name VARCHAR(255),\n", - " days_held INT,\n", - " defense INT\n", - ");\n", - "Testing select query: SELECT MIN(defense) as lowest_defense FROM Champions WHERE days_held = 345;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE games_records (\n", - " id INTEGER PRIMARY KEY AUTOINCREMENT,\n", - " game_date DATE NOT NULL,\n", - " team1_score INTEGER NOT NULL,\n", - " team2_score INTEGER NOT NULL\n", - ");\n", - "Testing select query: SELECT game_date FROM games_records WHERE team1_score = 76 AND team2_score = 72;\n", - "[]\n", - "Testing create query: CREATE TABLE GameResults (\n", - " game_id INT PRIMARY KEY,\n", - " pitcher_name VARCHAR(50),\n", - " pitcher_record VARCHAR(10),\n", - " attendance INT,\n", - " result VARCHAR(50)\n", - ");\n", - "Testing select query: SELECT attendance FROM GameResults WHERE pitcher_name = 'Ponson' AND pitcher_record = '1-5' AND result LIKE '%loss%';\n", - "[]\n", - "Testing create query: CREATE TABLE records (\n", - " id SERIAL PRIMARY KEY,\n", - " event_date DATE NOT NULL,\n", - " record_type TEXT NOT NULL,\n", - " record_value TEXT NOT NULL\n", - ");\n", - "Testing select query: SELECT event_date FROM records WHERE record_value = '36-39';\n", - "[]\n", - "Testing create query: CREATE TABLE records (\n", - " record_id INT PRIMARY KEY,\n", - " win_count INT,\n", - " loss_count INT,\n", - " record_date DATE\n", - ");\n", - "Testing select query: SELECT record_date FROM records WHERE win_count = 30 AND loss_count = 31;\n", - "[]\n", - "Testing create query: CREATE TABLE games (\n", - " id INT PRIMARY KEY,\n", - " player_name VARCHAR(255),\n", - " opponent_name VARCHAR(255),\n", - " player_score INT,\n", - " opponent_score INT\n", - ");\n", - "Testing select query: SELECT opponent_name FROM games WHERE player_name = 'Leonard' AND player_score = 7 AND opponent_score = 8;\n", - "[]\n", - "Testing create query: CREATE TABLE GameScores (\n", - " id INT PRIMARY KEY,\n", - " record VARCHAR(10),\n", - " score VARCHAR(10),\n", - " date_played DATE\n", - ");\n", - "Testing select query: SELECT score FROM GameScores WHERE record = '18–43';\n", - "[]\n", - "Testing create query: CREATE TABLE game_scores (\n", - " id INT PRIMARY KEY AUTO_INCREMENT,\n", - " game_date DATE,\n", - " opposing_team VARCHAR(50),\n", - " team_score INT,\n", - " opponent_score INT,\n", - " season_record VARCHAR(10)\n", - ");\n", - "Error while creating the SQLite table: near \"AUTO_INCREMENT\": syntax error\n", - "Testing select query: SELECT team_score, opponent_score FROM game_scores WHERE opposing_team = 'Royals' AND season_record = '24-52';\n", - "Error while executing select query: no such table: game_scores\n", - "Testing create query: CREATE TABLE GameRecord (\n", - " id INT PRIMARY KEY,\n", - " record VARCHAR(10),\n", - " score VARCHAR(50),\n", - " date DATE\n", - ");\n", - "Testing select query: SELECT score FROM GameRecord WHERE record = '22–46';\n", - "[]\n", - "Testing create query: CREATE TABLE MilitaryPersonnel (\n", - " id INT PRIMARY KEY,\n", - " real_name VARCHAR(255),\n", - " primary_specialty VARCHAR(255)\n", - ");\n", - "Testing select query: SELECT real_name FROM MilitaryPersonnel WHERE primary_specialty = 'shock paratrooper';\n", - "[]\n", - "Testing create query: CREATE TABLE Persons (\n", - " PersonID INT PRIMARY KEY,\n", - " FirstName VARCHAR(255) NOT NULL,\n", - " LastName VARCHAR(255) NOT NULL,\n", - " Birthplace VARCHAR(255) NOT NULL\n", - ");\n", - "Testing select query: SELECT Birthplace FROM Persons WHERE FirstName = 'Pete' AND LastName = 'Sanderson';\n", - "[]\n", - "Testing create query: CREATE TABLE roles (\n", - " id SERIAL PRIMARY KEY,\n", - " person_name VARCHAR(100) NOT NULL,\n", - " role_title VARCHAR(100) NOT NULL\n", - ");\n", - "Testing select query: SELECT role_title FROM roles WHERE person_name = 'Jean-Luc Bouvier';\n", - "[]\n", - "Testing create query: CREATE TABLE KayakPilots (\n", - " id INT PRIMARY KEY,\n", - " real_name VARCHAR(255),\n", - " nickname VARCHAR(255),\n", - " vessel_type VARCHAR(100)\n", - ");\n", - "Testing select query: SELECT real_name FROM KayakPilots WHERE vessel_type = 'silent attack kayak';\n", - "[]\n", - "Testing create query: CREATE TABLE people (\n", - " id INT PRIMARY KEY,\n", - " name VARCHAR(100),\n", - " code_name VARCHAR(100),\n", - " city_of_birth VARCHAR(100),\n", - " date_of_birth DATE\n", - ");\n", - "Testing select query: SELECT code_name FROM people WHERE city_of_birth = 'Liverpool';\n", - "[]\n", - "Testing create query: CREATE TABLE CanoeingMedalists (\n", - " id INT PRIMARY KEY,\n", - " name VARCHAR(100) NOT NULL,\n", - " event VARCHAR(100) NOT NULL,\n", - " medal_type VARCHAR(50) NOT NULL,\n", - " year INT NOT NULL\n", - ");\n", - "Testing select query: SELECT name FROM CanoeingMedalists WHERE event = 'Canoeing';\n", - "[]\n", - "Testing create query: CREATE TABLE HalfMiddleweightEvents (\n", - " GameID INT PRIMARY KEY,\n", - " GameName VARCHAR(255),\n", - " Event VARCHAR(50),\n", - " Year INT\n", - ");\n", - "Testing select query: SELECT GameName, Year FROM HalfMiddleweightEvents WHERE Event = 'Women\\'s Half Middleweight';\n", - "Error while executing select query: near \"s\": syntax error\n", - "Testing create query: CREATE TABLE OlympicMedalists2000 (\n", - " AthleteID INT PRIMARY KEY,\n", - " Name VARCHAR(255) NOT NULL,\n", - " MedalType VARCHAR(50) NOT NULL,\n", - " Event VARCHAR(255) NOT NULL,\n", - " Country VARCHAR(100) NOT NULL\n", - ");\n", - "Testing select query: SELECT Name, Event, Country FROM OlympicMedalists2000 WHERE MedalType = 'Bronze' AND Event = 'Specific Event Name';\n", - "[]\n", - "Testing create query: CREATE TABLE GameAttendance (\n", - " GameID INT PRIMARY KEY,\n", - " Opponent VARCHAR(50),\n", - " Attendance INT\n", - ");\n", - "Testing select query: SELECT SUM(Attendance) AS Total_Attendance\n", - "FROM GameAttendance\n", - "WHERE Opponent = 'Twins';\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE sports_records (\n", - " id SERIAL PRIMARY KEY,\n", - " date DATE NOT NULL,\n", - " record VARCHAR(255) NOT NULL\n", - ");\n", - "Testing select query: SELECT date FROM sports_records WHERE record = '41-46';\n", - "[]\n", - "Testing create query: CREATE TABLE Scores (\n", - " id INT PRIMARY KEY,\n", - " score_name VARCHAR(255),\n", - " score_value VARCHAR(10)\n", - ");\n", - "Testing select query: SELECT score_name FROM Scores WHERE score_value = '48-55';\n", - "[]\n", - "Testing create query: CREATE TABLE sports_records (\n", - " team_name VARCHAR(255),\n", - " games_won INT,\n", - " games_lost INT,\n", - " PRIMARY KEY (team_name)\n", - ");\n", - "Testing select query: SELECT team_name FROM sports_records WHERE games_won = 44 AND games_lost = 49;\n", - "[]\n", - "Testing create query: CREATE TABLE games (\n", - " game_id INT PRIMARY KEY,\n", - " opponent VARCHAR(50),\n", - " record VARCHAR(10),\n", - " score VARCHAR(10)\n", - ");\n", - "Testing select query: SELECT score FROM games WHERE opponent = 'white sox' AND record = '2-0';\n", - "[]\n", - "Testing create query: CREATE TABLE election_votes (\n", - " candidate_name VARCHAR(255),\n", - " votes_received INT\n", - ");\n", - "Testing select query: SELECT votes_received FROM election_votes WHERE candidate_name = 'Candice Sjostrom';\n", - "[]\n", - "Testing create query: CREATE TABLE election_results (\n", - " candidate_name VARCHAR(100),\n", - " votes_received INT,\n", - " total_votes INT\n", - ");\n", - "Testing select query: SELECT (votes_received * 100.0) / total_votes AS percentage_received \n", - "FROM election_results \n", - "WHERE candidate_name = 'Chris Wright';\n", - "[]\n", - "Testing create query: CREATE TABLE election_results (\n", - " election_year INT,\n", - " candidate_name VARCHAR(255),\n", - " vote_count INT,\n", - " vote_percentage DECIMAL(5, 2),\n", - " office VARCHAR(255),\n", - " office_district INT\n", - ");\n", - "Testing select query: SELECT vote_count\n", - "FROM election_results\n", - "WHERE election_year > 1992 \n", - " AND vote_percentage = 1.59 \n", - " AND office = 'us representative' \n", - " AND office_district = 4;\n", - "[]\n", - "Testing create query: CREATE TABLE Representatives (\n", - " id SERIAL PRIMARY KEY,\n", - " first_name VARCHAR(50),\n", - " last_name VARCHAR(50),\n", - " start_year INT,\n", - " end_year INT\n", - ");\n", - "Testing select query: SELECT start_year, end_year FROM Representatives WHERE first_name = 'J.' AND last_name = 'Smith Young';\n", - "[]\n", - "Testing create query: CREATE TABLE Politicians (\n", - " id INT PRIMARY KEY,\n", - " name VARCHAR(100),\n", - " party VARCHAR(100)\n", - ");\n", - "Testing select query: SELECT party FROM Politicians WHERE name = 'Thomas L. Young';\n", - "[]\n", - "Testing create query: CREATE TABLE MedalCounts (\n", - " Country VARCHAR(100),\n", - " Gold INT,\n", - " Silver INT,\n", - " Bronze INT,\n", - " Total INT\n", - ");\n", - "Testing select query: SELECT MIN(Total) AS LowestMedalCount FROM MedalCounts \n", - "WHERE Gold = 0 AND Bronze > 2 AND Silver > 1;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE country_medals (\n", - " rank INT,\n", - " country_name VARCHAR(255),\n", - " gold_medals INT,\n", - " silver_medals INT,\n", - " bronze_medals INT,\n", - " total_medals INT\n", - ");\n", - "Testing select query: SELECT SUM(silver_medals) FROM country_medals WHERE rank = 14 AND total_medals < 1;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE player_stats (\n", - " player_id INT PRIMARY KEY,\n", - " player_name VARCHAR(100),\n", - " tackles INT,\n", - " fumble_recoveries INT,\n", - " forced_fumbles INT\n", - ");\n", - "Testing select query: SELECT tackles FROM player_stats WHERE fumble_recoveries > 0 AND forced_fumbles > 0;\n", - "[]\n", - "Testing create query: CREATE TABLE DefensiveStats (\n", - " player_id INT PRIMARY KEY,\n", - " player_name VARCHAR(100),\n", - " solo_tackles INT,\n", - " forced_fumbles INT\n", - ");\n", - "Testing select query: SELECT forced_fumbles FROM DefensiveStats WHERE player_name = 'jim laney' AND solo_tackles < 2;\n", - "[]\n", - "Testing create query: CREATE TABLE PlayersStats (\n", - " PlayerID INT PRIMARY KEY,\n", - " PlayerName VARCHAR(255),\n", - " SoloTackles INT,\n", - " Total INT\n", - ");\n", - "Testing select query: SELECT MAX(Total) AS HighTotal FROM PlayersStats WHERE SoloTackles > 15;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE PlayerStats (\n", - " player_id INT PRIMARY KEY,\n", - " player_name VARCHAR(100),\n", - " fumble_recoveries INT,\n", - " forced_fumbles INT,\n", - " sacks INT,\n", - " solo_tackles INT\n", - ");\n", - "Testing select query: SELECT fumble_recoveries FROM PlayerStats WHERE player_name = 'Scott Gajos' AND forced_fumbles = 0 AND sacks = 0 AND solo_tackles < 2;\n", - "[]\n", - "Testing create query: CREATE TABLE Matches (\n", - " MatchID INT PRIMARY KEY,\n", - " HomeTeam VARCHAR(255),\n", - " OpponentTeam VARCHAR(255),\n", - " MatchTime TIME,\n", - " Stadium VARCHAR(255)\n", - ");\n", - "Testing select query: SELECT OpponentTeam FROM Matches WHERE MatchTime = '20:00:00' AND Stadium = 'Camp Nou';\n", - "[]\n", - "Testing create query: CREATE TABLE matches (\n", - " id INT PRIMARY KEY,\n", - " date DATE,\n", - " time TIME,\n", - " score VARCHAR(5)\n", - ");\n", - "Testing select query: SELECT time FROM matches WHERE score = '3-2';\n", - "[]\n", - "Testing create query: CREATE TABLE Matches (\n", - " MatchID INT PRIMARY KEY,\n", - " HomeTeam VARCHAR(255),\n", - " AwayTeam VARCHAR(255),\n", - " Ground VARCHAR(255),\n", - " MatchDate DATE\n", - ");\n", - "Testing select query: SELECT Ground FROM Matches WHERE HomeTeam = 'Aston Villa' OR AwayTeam = 'Aston Villa';\n", - "[]\n", - "Testing create query: CREATE TABLE CompetitionEvents (\n", - " EventID INT PRIMARY KEY AUTO_INCREMENT,\n", - " CompetitionName VARCHAR(100),\n", - " Location VARCHAR(100),\n", - " EventTime TIME,\n", - " EventDate DATE\n", - ");\n", - "Error while creating the SQLite table: near \"AUTO_INCREMENT\": syntax error\n", - "Testing select query: SELECT CompetitionName FROM CompetitionEvents \n", - "WHERE Location = 'San Siro' AND EventTime = '18:30:00' \n", - "ORDER BY EventDate DESC \n", - "LIMIT 1;\n", - "Error while executing select query: no such table: CompetitionEvents\n", - "Testing create query: CREATE TABLE school_locality_deciles (\n", - " locality_id INT PRIMARY KEY,\n", - " locality_name VARCHAR(255),\n", - " total_decile INT\n", - ");\n", - "Testing select query: SELECT locality_name, total_decile FROM school_locality_deciles WHERE locality_name = 'redwood';\n", - "[]\n", - "Testing create query: CREATE TABLE racing_reports (\n", - " report_id INT PRIMARY KEY,\n", - " report_name VARCHAR(255) NOT NULL,\n", - " track_name VARCHAR(255),\n", - " event_date DATE\n", - ");\n", - "Testing select query: SELECT report_name FROM racing_reports WHERE track_name = 'Circuit of Tripoli';\n", - "[]\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a1c63bbf32c412b8f649d06c1a28ef4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/50 [00:00 3 AND defenses = 1;\n", - "[]\n", - "Testing create query: CREATE TABLE champions (id INT, name VARCHAR(255), days_held INT, reign_count INT, defenses INT);\n", - "Testing select query: SELECT days_held FROM champions WHERE reign_count > 3 AND defenses < 1;\n", - "[]\n", - "Testing create query: CREATE TABLE champions (id INT PRIMARY KEY, name VARCHAR(255), days_held INT, reign INT, defenses INT);\n", - "Testing select query: SELECT AVG(defenses) AS average_defenses FROM champions WHERE days_held = 404 AND reign > 1;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE champions (id INT PRIMARY KEY, name VARCHAR(255), defense INT, days_held INT);\n", - "Testing select query: SELECT MIN(defense) AS lowest_defense FROM champions WHERE days_held = 345;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE games (date DATE, team1_score INT, team2_score INT);\n", - "Testing select query: SELECT date FROM games WHERE (team1_score = 76 AND team2_score = 72) OR (team1_score = 72 AND team2_score = 76);\n", - "[]\n", - "Testing create query: CREATE TABLE match_results (match_id INT, team_name VARCHAR(255), player_name VARCHAR(255), attendance INT, result VARCHAR(50));\n", - "Testing select query: SELECT attendance FROM match_results WHERE player_name = 'Ponson' AND result = 'loss' AND match_id = 1;\n", - "[]\n", - "Testing create query: CREATE TABLE daily_records (id INT PRIMARY KEY, record_value INT, record_date DATE);\n", - "Testing select query: SELECT record_date FROM daily_records WHERE record_value BETWEEN 36 AND 39;\n", - "[]\n", - "Testing create query: CREATE TABLE records (id INTEGER PRIMARY KEY, record VARCHAR(10), date DATE);\n", - "Testing select query: SELECT date FROM records WHERE record = '30-31';\n", - "[]\n", - "Testing create query: CREATE TABLE baseball_games (game_date DATE, opponent TEXT, player_name TEXT, record TEXT, outcome TEXT);\n", - "Testing select query: SELECT opponent FROM baseball_games WHERE player_name = 'Leonard' AND record = '7-8';\n", - "[]\n", - "Testing create query: CREATE TABLE Games (id INT PRIMARY KEY, record VARCHAR(10), score VARCHAR(10));\n", - "Testing select query: SELECT score FROM Games WHERE record = '18–43';\n", - "[]\n", - "Testing create query: CREATE TABLE baseball_games (id INT PRIMARY KEY, opponent VARCHAR(50), our_score INT, their_score INT, our_record VARCHAR(10));\n", - "Testing select query: SELECT our_score, their_score FROM baseball_games WHERE opponent = 'Royals' AND our_record = '24-52';\n", - "[]\n", - "Testing create query: CREATE TABLE GameScores (id INTEGER PRIMARY KEY, record VARCHAR(10), score VARCHAR(10));\n", - "Testing select query: SELECT score FROM GameScores WHERE record = '22–46';\n", - "[]\n", - "Testing create query: CREATE TABLE MilitarySpecialty (real_name VARCHAR(100), specialty VARCHAR(100));\n", - "Testing select query: SELECT real_name FROM MilitarySpecialty WHERE specialty = 'shock paratrooper';\n", - "[]\n", - "Testing create query: CREATE TABLE People (Name VARCHAR(255), Birthplace VARCHAR(255));\n", - "Testing select query: SELECT Birthplace FROM People WHERE Name = 'Pete Sanderson';\n", - "[]\n", - "Testing create query: CREATE TABLE Roles (person_name VARCHAR(255), role VARCHAR(255));\n", - "Testing select query: SELECT role FROM Roles WHERE person_name = 'Jean-Luc Bouvier';\n", - "[]\n", - "Testing create query: CREATE TABLE KayakPilots (PilotID INT PRIMARY KEY, RealName VARCHAR(255), Alias VARCHAR(255));\n", - "Testing select query: SELECT RealName FROM KayakPilots WHERE Alias = 'silent attack kayak';\n", - "[]\n", - "Testing create query: CREATE TABLE people (id INT PRIMARY KEY, name VARCHAR(100), birth_city VARCHAR(100), code_name VARCHAR(100));\n", - "Testing select query: SELECT code_name FROM people WHERE birth_city = 'Liverpool';\n", - "[]\n", - "Testing create query: CREATE TABLE CanoeingMedalists (ID INT PRIMARY KEY, Name VARCHAR(100), Country VARCHAR(100), MedalType VARCHAR(50));\n", - "Testing select query: SELECT Name FROM CanoeingMedalists;\n", - "[]\n", - "Testing create query: CREATE TABLE WomensHalfMiddleweightEvents (id INT PRIMARY KEY, game_name TEXT, year INT, location TEXT);\n", - "Testing select query: SELECT game_name, year, location FROM WomensHalfMiddleweightEvents WHERE game_name IS NOT NULL;\n", - "[]\n", - "Testing create query: CREATE TABLE BronzeMedals (Year INT, Games VARCHAR(100), Sport VARCHAR(100), Event VARCHAR(100), Athlete VARCHAR(100), Country VARCHAR(100));\n", - "Testing select query: SELECT Athlete, Country FROM BronzeMedals WHERE Year = 2000 AND Games = 'Sydney';\n", - "[]\n", - "Testing create query: CREATE TABLE BaseballMatches (match_id INT PRIMARY KEY, date DATE, opponent VARCHAR(100), attendance INT);\n", - "Testing select query: SELECT COUNT(*) FROM BaseballMatches WHERE opponent = 'twins';\n", - "[(0,)]\n", - "Testing create query: CREATE TABLE Records (Date DATE, WinCount INT, LossCount INT, PRIMARY KEY (Date));\n", - "Testing select query: SELECT Date FROM Records WHERE WinCount = 41 AND LossCount = 46;\n", - "[]\n", - "Testing create query: CREATE TABLE team_records (id INTEGER PRIMARY KEY, team_name VARCHAR(100), wins INTEGER, losses INTEGER, score INTEGER);\n", - "Testing select query: SELECT score FROM team_records WHERE wins = 48 AND losses = 55;\n", - "[]\n", - "Testing create query: CREATE TABLE Scores (record VARCHAR(10), score INT);\n", - "Testing select query: SELECT score FROM Scores WHERE record = '44-49';\n", - "[]\n", - "Testing create query: CREATE TABLE GameScores (Score INT, Opponent VARCHAR(50), Record VARCHAR(5));\n", - "Testing select query: SELECT Score FROM GameScores WHERE Opponent = 'white sox' AND Record = '2-0';\n", - "[]\n", - "Testing create query: CREATE TABLE votes (id INT PRIMARY KEY, candidate_name VARCHAR(255), votes INT);\n", - "Testing select query: SELECT votes FROM votes WHERE candidate_name = 'candice sjostrom';\n", - "[]\n", - "Testing create query: CREATE TABLE Votes(candidate_name VARCHAR(255), received_percentage DECIMAL(5,2));\n", - "Testing select query: SELECT received_percentage FROM Votes WHERE candidate_name = 'Chris Wright';\n", - "[]\n", - "Testing create query: CREATE TABLE election_results (year INT, votes INT, percentage FLOAT, office VARCHAR(50), candidate_id INT);\n", - "Testing select query: SELECT votes FROM election_results WHERE year > 1992 AND percentage = 1.59 AND office = 'US Representative 4';\n", - "[]\n", - "Testing create query: CREATE TABLE Representatives (Name VARCHAR(100), StartYear INT, EndYear INT);\n", - "Testing select query: SELECT StartYear, EndYear FROM Representatives WHERE Name = 'J. Smith Young';\n", - "[]\n", - "Testing create query: CREATE TABLE Politicians (id INT PRIMARY KEY, name VARCHAR(255), party VARCHAR(255), term_start DATE, term_end DATE);\n", - "Testing select query: SELECT party FROM Politicians WHERE name = 'Thomas L. Young';\n", - "[]\n", - "Testing create query: CREATE TABLE medals (country VARCHAR(100), gold INT, silver INT, bronze INT, total INT);\n", - "Testing select query: SELECT MIN(total) FROM medals WHERE gold = 0 AND bronze > 2 AND silver > 1;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE OlympicStats (Country VARCHAR(100), Rank INT, GoldMedals INT, SilverMedals INT, BronzeMedals INT, TotalMedals INT);\n", - "Testing select query: SELECT SUM(SilverMedals) as TotalSilverMedals FROM OlympicStats WHERE Rank = 14 AND TotalMedals < 1;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE player_statistics (player_id INT PRIMARY KEY, player_name VARCHAR(100), number_of_tackles INT, fumble_recoveries INT, forced_fumbles INT);\n", - "Testing select query: SELECT player_name, number_of_tackles FROM player_statistics WHERE fumble_recoveries > 0 AND forced_fumbles > 0;\n", - "[]\n", - "Testing create query: CREATE TABLE ForcedFumbles (player_name VARCHAR(100), solo_tackles INT, forced_fumbles INT);\n", - "Testing select query: SELECT forced_fumbles FROM ForcedFumbles WHERE player_name = 'Jim Laney' AND solo_tackles < 2;\n", - "[]\n", - "Testing create query: CREATE TABLE PlayerStatistics (PlayerID INT PRIMARY KEY, PlayerName VARCHAR(100), SoloTackles INT, TotalTackles INT);\n", - "Testing select query: SELECT MAX(TotalTackles) AS HighTotal FROM PlayerStatistics WHERE SoloTackles > 15;\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE PlayerStats (player_name VARCHAR(50), fumble_recoveries INT, forced_fumbles INT, sacks INT, solo_tackles INT);\n", - "Testing select query: SELECT fumble_recoveries FROM PlayerStats WHERE player_name = 'Scott Gajos' AND forced_fumbles = 0 AND sacks = 0 AND solo_tackles < 2;\n", - "[]\n", - "Testing create query: CREATE TABLE matches (id INT PRIMARY KEY, home_team VARCHAR(100), opponent VARCHAR(100), date TIME, location VARCHAR(100));\n", - "Testing select query: SELECT opponent FROM matches WHERE date = '20:00:00' AND location = 'Camp Nou';\n", - "[]\n", - "Testing create query: CREATE TABLE matches (id INT PRIMARY KEY, match_time TIME, team1_score INT, team2_score INT);\n", - "Testing select query: SELECT match_time FROM matches WHERE team1_score = 3 AND team2_score = 2;\n", - "[]\n", - "Testing create query: CREATE TABLE Matches (id INT PRIMARY KEY, home_team VARCHAR(255), away_team VARCHAR(255), ground VARCHAR(255), date DATE);\n", - "Testing select query: SELECT ground FROM Matches WHERE away_team = 'Aston Villa';\n", - "[]\n", - "Testing create query: CREATE TABLE Competitions (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), start_time TIME, timezone VARCHAR(255));\n", - "Testing select query: SELECT name FROM Competitions WHERE location = 'San Siro' AND start_time = '18:30:00' AND timezone = 'GMT';\n", - "[]\n", - "Testing create query: CREATE TABLE locality_scores (locality_id INT PRIMARY KEY, decile INT NOT NULL, locality_name VARCHAR(100) NOT NULL);\n", - "Testing select query: SELECT SUM(decile) AS total_decile FROM locality_scores WHERE locality_name = 'redwood school';\n", - "[(None,)]\n", - "Testing create query: CREATE TABLE reports (report_id INT PRIMARY KEY, report_name VARCHAR(255), content TEXT);\n", - "Testing select query: SELECT report_name FROM reports WHERE content LIKE '%Circuit of Tripoli%';\n", - "[]\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e64c52a42f1c4202a0c820f160c1504e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/50 [00:00\n", " 1\n", " What venue did the partnership of herschelle g...\n", - " {\"create\":\"CREATE TABLE CricketPartnerships (\\...\n", + " {\"create\":\"CREATE TABLE cricket_partnerships (...\n", " True\n", " True\n", " 5\n", @@ -1878,18 +1335,18 @@ " \n", " 2\n", " What is the number Played that has 310 Points ...\n", - " {\"create\":\"CREATE TABLE PointsTable (\\n Num...\n", - " True\n", + " {\"create\":\"CREATE TABLE Scores (\\n id INT P...\n", " True\n", + " False\n", " 5\n", - " SQL correct\n", + " SQL incorrect\n", " 1\n", " gpt-4\n", " \n", " \n", " 3\n", " What Losing bonus has a Points against of 588?\n", - " {\"create\":\"CREATE TABLE sports_stats (\\n te...\n", + " {\"create\":\"CREATE TABLE FootballTeams (\\n T...\n", " True\n", " True\n", " 5\n", @@ -1900,7 +1357,7 @@ " \n", " 4\n", " What Tries against has a Losing bonus of 7?\n", - " {\"create\":\"CREATE TABLE rugby_points (\\n id S...\n", + " {\"create\":\"CREATE TABLE RugbyScores (\\n tea...\n", " True\n", " True\n", " 4\n", @@ -1920,22 +1377,22 @@ "3 What Losing bonus has a Points against of 588? \n", "4 What Tries against has a Losing bonus of 7? \n", "\n", - " response format sql \\\n", - "0 {\"create\":\"CREATE TABLE cricket_partnerships (... True True \n", - "1 {\"create\":\"CREATE TABLE CricketPartnerships (\\... True True \n", - "2 {\"create\":\"CREATE TABLE PointsTable (\\n Num... True True \n", - "3 {\"create\":\"CREATE TABLE sports_stats (\\n te... True True \n", - "4 {\"create\":\"CREATE TABLE rugby_points (\\n id S... True True \n", + " response format sql \\\n", + "0 {\"create\":\"CREATE TABLE cricket_partnerships (... True True \n", + "1 {\"create\":\"CREATE TABLE cricket_partnerships (... True True \n", + "2 {\"create\":\"CREATE TABLE Scores (\\n id INT P... True False \n", + "3 {\"create\":\"CREATE TABLE FootballTeams (\\n T... True True \n", + "4 {\"create\":\"CREATE TABLE RugbyScores (\\n tea... True True \n", "\n", " evaluation_score unit_test_evaluation run Evaluating Model \n", "0 5 SQL correct 1 gpt-4 \n", "1 5 SQL correct 1 gpt-4 \n", - "2 5 SQL correct 1 gpt-4 \n", + "2 5 SQL incorrect 1 gpt-4 \n", "3 5 SQL correct 1 gpt-4 \n", "4 4 SQL correct 1 gpt-4 " ] }, - "execution_count": 184, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1946,7 +1403,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 37, "id": "ed800f0c", "metadata": {}, "outputs": [ @@ -1984,16 +1441,20 @@ " \n", " 1\n", " SQL correct\n", - " 46\n", + " 41\n", " \n", " \n", " SQL incorrect\n", - " 4\n", + " 9\n", " \n", " \n", - " 2\n", + " 2\n", " SQL correct\n", - " 50\n", + " 49\n", + " \n", + " \n", + " SQL incorrect\n", + " 1\n", " \n", " \n", "\n", @@ -2002,12 +1463,13 @@ "text/plain": [ " Number of records\n", "run unit_test_evaluation \n", - "1 SQL correct 46\n", - " SQL incorrect 4\n", - "2 SQL correct 50" + "1 SQL correct 41\n", + " SQL incorrect 9\n", + "2 SQL correct 49\n", + " SQL incorrect 1" ] }, - "execution_count": 185, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -2032,13 +1494,13 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 38, "id": "e2b4aa03-42f5-4c30-a610-e553937bf160", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -2087,7 +1549,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 39, "id": "7228eac7-e0a9-473d-9432-e558bbc91841", "metadata": {}, "outputs": [ @@ -2125,15 +1587,15 @@ " \n", " 1\n", " 3\n", - " 2\n", + " 1\n", " \n", " \n", " 4\n", - " 15\n", + " 13\n", " \n", " \n", " 5\n", - " 33\n", + " 36\n", " \n", " \n", " 2\n", @@ -2142,11 +1604,11 @@ " \n", " \n", " 4\n", - " 15\n", + " 13\n", " \n", " \n", " 5\n", - " 34\n", + " 36\n", " \n", " \n", "\n", @@ -2155,15 +1617,15 @@ "text/plain": [ " Number of records\n", "run evaluation_score \n", - "1 3 2\n", - " 4 15\n", - " 5 33\n", + "1 3 1\n", + " 4 13\n", + " 5 36\n", "2 3 1\n", - " 4 15\n", - " 5 34" + " 4 13\n", + " 5 36" ] }, - "execution_count": 187, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -2188,13 +1650,13 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 40, "id": "b2a18a78-55ec-43f6-9d62-929707a94364", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -2204,8 +1666,6 @@ } ], "source": [ - "\n", - "\n", "# Reset index without dropping the 'run' and 'evaluation_score' columns\n", "evaluation_df_pivot.reset_index(inplace=True)\n", "\n", @@ -2268,7 +1728,7 @@ ], "metadata": { "kernelspec": { - "display_name": "myenv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2282,7 +1742,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.8" } }, "nbformat": 4,