Fix image sizing for rules

2025-05-09 19:32:38 +00:00 · 2025-05-03 13:24:43 -07:00 · 2025-05-03 13:24:43 -07:00 · 9453d77848
commit 9453d77848
parent 1897485c87
2 changed files with 34 additions and 18 deletions
--- a/core/models/rules.py
+++ b/core/models/rules.py
@ -92,16 +92,32 @@ class MetadataExtractionRule(BaseRule):

        # Adjust prompt based on whether it's a chunk or full document and whether it's an image
        if self.use_images:
+            # For image rules, we do NOT embed the base64 image data in the textual prompt.
+            # The image will be sent separately via the `image_url` entry in the vision message.
+            # Keeping the textual prompt concise avoids blowing up the context window.
+            prompt = f"""
+            Extract metadata from the following image according to this schema:
+
+            {schema_text}
+
+            The image is attached below.
+
+            Follow these guidelines:
+            1. Extract all requested information as simple strings, numbers, or booleans
+            (not as objects or nested structures)
+            2. If information is not present, indicate this with null instead of making something up
+            3. Answer directly with the requested information - don't include explanations or reasoning
+            4. Be concise but accurate in your extractions
+            """
            prompt_context = "image" if self.stage == "post_chunking" else "document with images"
        else:
            prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
-
            prompt = f"""
            Extract metadata from the following {prompt_context} according to this schema:

            {schema_text}

-        {"Image to analyze:" if self.use_images else "Text to extract from:"}
+            Text to extract from:
            {content}

            Follow these guidelines:
--- a/core/services/document_service.py
+++ b/core/services/document_service.py
@ -744,17 +744,17 @@ class DocumentService:
                from PIL import Image as PILImage

                img = PILImage.open(BytesIO(file_content))
-                # Resize the image to a max width of 512 while preserving aspect ratio to
-                # keep the base64 payload smaller (helps avoid context window errors).
-                max_width = 512
+                # Resize and compress aggressively to minimize context window footprint
+                max_width = 256  # reduce width to shrink payload dramatically
                if img.width > max_width:
                    ratio = max_width / float(img.width)
                    new_height = int(float(img.height) * ratio)
                    img = img.resize((max_width, new_height))

                buffered = BytesIO()
-                img.save(buffered, format="PNG", optimize=True)
-                img_b64 = "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode()
+                # Save as JPEG with moderate quality instead of PNG to reduce size further
+                img.convert("RGB").save(buffered, format="JPEG", quality=70, optimize=True)
+                img_b64 = "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode()
                return [Chunk(content=img_b64, metadata={"is_image": True})]
            except Exception as e:
                logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")