Fix image sizing for rules

This commit is contained in:
Adityavardhan Agrawal 2025-05-03 13:24:43 -07:00
parent 1897485c87
commit 9453d77848
2 changed files with 34 additions and 18 deletions

View File

@ -92,25 +92,41 @@ class MetadataExtractionRule(BaseRule):
# Adjust prompt based on whether it's a chunk or full document and whether it's an image # Adjust prompt based on whether it's a chunk or full document and whether it's an image
if self.use_images: if self.use_images:
# For image rules, we do NOT embed the base64 image data in the textual prompt.
# The image will be sent separately via the `image_url` entry in the vision message.
# Keeping the textual prompt concise avoids blowing up the context window.
prompt = f"""
Extract metadata from the following image according to this schema:
{schema_text}
The image is attached below.
Follow these guidelines:
1. Extract all requested information as simple strings, numbers, or booleans
(not as objects or nested structures)
2. If information is not present, indicate this with null instead of making something up
3. Answer directly with the requested information - don't include explanations or reasoning
4. Be concise but accurate in your extractions
"""
prompt_context = "image" if self.stage == "post_chunking" else "document with images" prompt_context = "image" if self.stage == "post_chunking" else "document with images"
else: else:
prompt_context = "chunk of text" if self.stage == "post_chunking" else "text" prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
prompt = f"""
Extract metadata from the following {prompt_context} according to this schema:
prompt = f""" {schema_text}
Extract metadata from the following {prompt_context} according to this schema:
{schema_text} Text to extract from:
{content}
{"Image to analyze:" if self.use_images else "Text to extract from:"} Follow these guidelines:
{content} 1. Extract all requested information as simple strings, numbers, or booleans
(not as objects or nested structures)
Follow these guidelines: 2. If information is not present, indicate this with null instead of making something up
1. Extract all requested information as simple strings, numbers, or booleans 3. Answer directly with the requested information - don't include explanations or reasoning
(not as objects or nested structures) 4. Be concise but accurate in your extractions
2. If information is not present, indicate this with null instead of making something up """
3. Answer directly with the requested information - don't include explanations or reasoning
4. Be concise but accurate in your extractions
"""
# Get the model configuration from registered_models # Get the model configuration from registered_models
model_config = settings.REGISTERED_MODELS.get(settings.RULES_MODEL, {}) model_config = settings.REGISTERED_MODELS.get(settings.RULES_MODEL, {})

View File

@ -744,17 +744,17 @@ class DocumentService:
from PIL import Image as PILImage from PIL import Image as PILImage
img = PILImage.open(BytesIO(file_content)) img = PILImage.open(BytesIO(file_content))
# Resize the image to a max width of 512 while preserving aspect ratio to # Resize and compress aggressively to minimize context window footprint
# keep the base64 payload smaller (helps avoid context window errors). max_width = 256 # reduce width to shrink payload dramatically
max_width = 512
if img.width > max_width: if img.width > max_width:
ratio = max_width / float(img.width) ratio = max_width / float(img.width)
new_height = int(float(img.height) * ratio) new_height = int(float(img.height) * ratio)
img = img.resize((max_width, new_height)) img = img.resize((max_width, new_height))
buffered = BytesIO() buffered = BytesIO()
img.save(buffered, format="PNG", optimize=True) # Save as JPEG with moderate quality instead of PNG to reduce size further
img_b64 = "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode() img.convert("RGB").save(buffered, format="JPEG", quality=70, optimize=True)
img_b64 = "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode()
return [Chunk(content=img_b64, metadata={"is_image": True})] return [Chunk(content=img_b64, metadata={"is_image": True})]
except Exception as e: except Exception as e:
logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.") logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")