From 7d5a3c481f9399c977922a6339e06365ee716a18 Mon Sep 17 00:00:00 2001 From: "Paul S. Schweigert" Date: Mon, 18 May 2026 17:05:01 -0400 Subject: [PATCH] async bom building Signed-off-by: Paul S. Schweigert --- notebooks/atai_2026/tutorial.ipynb | 79 +----------------------------- 1 file changed, 2 insertions(+), 77 deletions(-) diff --git a/notebooks/atai_2026/tutorial.ipynb b/notebooks/atai_2026/tutorial.ipynb index 4d4a8fa..676cb08 100644 --- a/notebooks/atai_2026/tutorial.ipynb +++ b/notebooks/atai_2026/tutorial.ipynb @@ -217,82 +217,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from typing import Literal\n", - "from mellea.core import ModelOutputThunk\n", - "from mellea.stdlib.components.docs.richdocument import Table\n", - "from mellea.stdlib.requirements import req, simple_validate\n", - "import pydantic\n", - "\n", - "\n", - "class BOMEntry(pydantic.BaseModel):\n", - " item: str\n", - " quantity: int | str\n", - " notes: str\n", - " category: Literal[\"lumber\", \"windows\", \"doors\", \"other\"]\n", - "\n", - "class BOM(pydantic.BaseModel):\n", - " items: list[BOMEntry]\n", - "\n", - "def _bom_entry_is_well_formed(entry: BOMEntry) -> bool:\n", - " \"\"\"Checks that the BOMEntry quantity is either an integer or 'allowance'.\"\"\"\n", - " try:\n", - " int(entry.quantity)\n", - " return True\n", - " except ValueError as e:\n", - " if entry.quantity.lower() == \"allowance\":\n", - " return True\n", - " return False\n", - "\n", - "def _bom_entries_are_well_formed(s: str) -> bool:\n", - " try:\n", - " bom = BOM.model_validate_json(s)\n", - " return all([_bom_entry_is_well_formed(entry) for entry in bom.items])\n", - " except pydantic.ValidationError as e:\n", - " print(f\"Failed on table: {s}\")\n", - " return False\n", - "\n", - "# Filter out tables that are not lists of construction items.\n", - "@mellea.generative\n", - "def is_material_list(table_markdown: str) -> Literal[\"yes\", \"no\"]:\n", - " \"\"\"Determines if the table contains a list of construction items.\"\"\"\n", - "\n", - "async def extract_bom(doc: RichDocument):\n", - " bom_routines = list()\n", - " # Fire off async requests for each table.\n", - " for table in doc.get_tables():\n", - " if is_material_list(m, table_markdown=table.to_markdown()) == \"yes\":\n", - " next_sub_bom = m.ainstruct(\n", - " \"Reformat this table to have four columns: item, quantity, type, and notes (optional).\",\n", - " grounding_context={'table': table.to_markdown()},\n", - " requirements=[\n", - " req(\n", - " \"Quantity row should only contain an integer or Allowance\",\n", - " validation_fn=simple_validate(_bom_entries_are_well_formed)\n", - " ),\n", - " req(\n", - " \"type should be one of: lumber, windows, doors, other\",\n", - " validation_fn=simple_validate(lambda x: True)\n", - " ), # note: this is enforced by the Literal type so no check is required.\n", - " ],\n", - " format=BOM\n", - " )\n", - " bom_routines.append(next_sub_bom)\n", - " \n", - " # wait for all of the async work to finish, then concatenate the results.\n", - " bom_thunks: list[ModelOutputThunk] = [await bom_routine for bom_routine in bom_routines]\n", - " boms = [BOM.model_validate_json(await bom_thunk.avalue()) for bom_thunk in bom_thunks]\n", - " \n", - " # Concatente all of the indiviual BOMs into one large list.\n", - " all_items = []\n", - " for bom in boms:\n", - " all_items.extend(bom.items)\n", - " full_bom = BOM(items=all_items)\n", - " return full_bom\n", - "\n", - "bom = None\n", - "bom = await extract_bom(doc=construction_plans)" - ] + "source": "import asyncio\nfrom typing import Literal\nfrom mellea.core import ModelOutputThunk\nfrom mellea.stdlib.components.docs.richdocument import Table\nfrom mellea.stdlib.requirements import req, simple_validate\nimport pydantic\n\n\nclass BOMEntry(pydantic.BaseModel):\n item: str\n quantity: int | str\n notes: str\n category: Literal[\"lumber\", \"windows\", \"doors\", \"other\"]\n\nclass BOM(pydantic.BaseModel):\n items: list[BOMEntry]\n\ndef _bom_entry_is_well_formed(entry: BOMEntry) -> bool:\n \"\"\"Checks that the BOMEntry quantity is either an integer or 'allowance'.\"\"\"\n try:\n int(entry.quantity)\n return True\n except ValueError as e:\n if entry.quantity.lower() == \"allowance\":\n return True\n return False\n\ndef _bom_entries_are_well_formed(s: str) -> bool:\n try:\n bom = BOM.model_validate_json(s)\n return all([_bom_entry_is_well_formed(entry) for entry in bom.items])\n except pydantic.ValidationError as e:\n print(f\"Failed on table: {s}\")\n return False\n\n# Filter out tables that are not lists of construction items.\n@mellea.generative\ndef is_material_list(table_markdown: str) -> Literal[\"yes\", \"no\"]:\n \"\"\"Determines if the table contains a list of construction items.\"\"\"\n\nasync def extract_bom(doc: RichDocument):\n bom_routines = list()\n # Fire off async requests for each table.\n for table in doc.get_tables():\n if is_material_list(m, table_markdown=table.to_markdown()) == \"yes\":\n next_sub_bom = m.ainstruct(\n \"Reformat this table to have four columns: item, quantity, type, and notes (optional).\",\n grounding_context={'table': table.to_markdown()},\n requirements=[\n req(\n \"Quantity row should only contain an integer or Allowance\",\n validation_fn=simple_validate(_bom_entries_are_well_formed)\n ),\n req(\n \"type should be one of: lumber, windows, doors, other\",\n validation_fn=simple_validate(lambda x: True)\n ), # note: this is enforced by the Literal type so no check is required.\n ],\n format=BOM\n )\n bom_routines.append(next_sub_bom)\n \n # wait for all of the async work to finish in parallel, then concatenate the results.\n bom_thunks: list[ModelOutputThunk] = await asyncio.gather(*bom_routines)\n boms = [BOM.model_validate_json(await bom_thunk.avalue()) for bom_thunk in bom_thunks]\n \n # Concatente all of the indiviual BOMs into one large list.\n all_items = []\n for bom in boms:\n all_items.extend(bom.items)\n full_bom = BOM(items=all_items)\n return full_bom\n\nbom = None\nbom = await extract_bom(doc=construction_plans)" }, { "cell_type": "markdown", @@ -666,4 +591,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file