Fix image sizing for rules

This commit is contained in:
Adityavardhan Agrawal 2025-05-03 13:24:43 -07:00
parent 1897485c87
commit 9453d77848
2 changed files with 34 additions and 18 deletions

View File

@ -92,16 +92,32 @@ class MetadataExtractionRule(BaseRule):
# Adjust prompt based on whether it's a chunk or full document and whether it's an image
if self.use_images:
# For image rules, we do NOT embed the base64 image data in the textual prompt.
# The image will be sent separately via the `image_url` entry in the vision message.
# Keeping the textual prompt concise avoids blowing up the context window.
prompt = f"""
Extract metadata from the following image according to this schema:
{schema_text}
The image is attached below.
Follow these guidelines:
1. Extract all requested information as simple strings, numbers, or booleans
(not as objects or nested structures)
2. If information is not present, indicate this with null instead of making something up
3. Answer directly with the requested information - don't include explanations or reasoning
4. Be concise but accurate in your extractions
"""
prompt_context = "image" if self.stage == "post_chunking" else "document with images"
else:
prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
prompt = f"""
Extract metadata from the following {prompt_context} according to this schema:
{schema_text}
{"Image to analyze:" if self.use_images else "Text to extract from:"}
Text to extract from:
{content}
Follow these guidelines:

View File

@ -744,17 +744,17 @@ class DocumentService:
from PIL import Image as PILImage
img = PILImage.open(BytesIO(file_content))
# Resize the image to a max width of 512 while preserving aspect ratio to
# keep the base64 payload smaller (helps avoid context window errors).
max_width = 512
# Resize and compress aggressively to minimize context window footprint
max_width = 256 # reduce width to shrink payload dramatically
if img.width > max_width:
ratio = max_width / float(img.width)
new_height = int(float(img.height) * ratio)
img = img.resize((max_width, new_height))
buffered = BytesIO()
img.save(buffered, format="PNG", optimize=True)
img_b64 = "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode()
# Save as JPEG with moderate quality instead of PNG to reduce size further
img.convert("RGB").save(buffered, format="JPEG", quality=70, optimize=True)
img_b64 = "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode()
return [Chunk(content=img_b64, metadata={"is_image": True})]
except Exception as e:
logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")