Fix image sizing for rules

2025-05-09 19:32:38 +00:00 · 2025-05-03 13:24:43 -07:00 · 2025-05-03 13:24:43 -07:00 · 9453d77848
commit 9453d77848
parent 1897485c87
2 changed files with 34 additions and 18 deletions
--- a/core/models/rules.py
+++ b/core/models/rules.py
@ -92,25 +92,41 @@ class MetadataExtractionRule(BaseRule):
        # Adjust prompt based on whether it's a chunk or full document and whether it's an image
        if self.use_images:
            # For image rules, we do NOT embed the base64 image data in the textual prompt.
            # The image will be sent separately via the `image_url` entry in the vision message.
            # Keeping the textual prompt concise avoids blowing up the context window.
            prompt = f"""
            Extract metadata from the following image according to this schema:
            {schema_text}
            The image is attached below.
            Follow these guidelines:
            1. Extract all requested information as simple strings, numbers, or booleans
            (not as objects or nested structures)
            2. If information is not present, indicate this with null instead of making something up
            3. Answer directly with the requested information - don't include explanations or reasoning
            4. Be concise but accurate in your extractions
            """
            prompt_context = "image" if self.stage == "post_chunking" else "document with images"
        else:
            prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
            prompt = f"""
            Extract metadata from the following {prompt_context} according to this schema:
-        prompt = f"""
+            {schema_text}
        Extract metadata from the following {prompt_context} according to this schema:
-        {schema_text}
+            Text to extract from:
            {content}
-        {"Image to analyze:" if self.use_images else "Text to extract from:"}
+            Follow these guidelines:
-        {content}
+            1. Extract all requested information as simple strings, numbers, or booleans
-
+            (not as objects or nested structures)
-        Follow these guidelines:
+            2. If information is not present, indicate this with null instead of making something up
-        1. Extract all requested information as simple strings, numbers, or booleans
+            3. Answer directly with the requested information - don't include explanations or reasoning
-        (not as objects or nested structures)
+            4. Be concise but accurate in your extractions
-        2. If information is not present, indicate this with null instead of making something up
+            """
        3. Answer directly with the requested information - don't include explanations or reasoning
        4. Be concise but accurate in your extractions
        """
        # Get the model configuration from registered_models
        model_config = settings.REGISTERED_MODELS.get(settings.RULES_MODEL, {})
--- a/core/services/document_service.py
+++ b/core/services/document_service.py
@ -744,17 +744,17 @@ class DocumentService:
                from PIL import Image as PILImage
                img = PILImage.open(BytesIO(file_content))
-                # Resize the image to a max width of 512 while preserving aspect ratio to
+                # Resize and compress aggressively to minimize context window footprint
-                # keep the base64 payload smaller (helps avoid context window errors).
+                max_width = 256  # reduce width to shrink payload dramatically
                max_width = 512
                if img.width > max_width:
                    ratio = max_width / float(img.width)
                    new_height = int(float(img.height) * ratio)
                    img = img.resize((max_width, new_height))
                buffered = BytesIO()
-                img.save(buffered, format="PNG", optimize=True)
+                # Save as JPEG with moderate quality instead of PNG to reduce size further
-                img_b64 = "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode()
+                img.convert("RGB").save(buffered, format="JPEG", quality=70, optimize=True)
                img_b64 = "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode()
                return [Chunk(content=img_b64, metadata={"is_image": True})]
            except Exception as e:
                logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")