mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Fix image sizing for rules
This commit is contained in:
parent
1897485c87
commit
9453d77848
@ -92,25 +92,41 @@ class MetadataExtractionRule(BaseRule):
|
|||||||
|
|
||||||
# Adjust prompt based on whether it's a chunk or full document and whether it's an image
|
# Adjust prompt based on whether it's a chunk or full document and whether it's an image
|
||||||
if self.use_images:
|
if self.use_images:
|
||||||
|
# For image rules, we do NOT embed the base64 image data in the textual prompt.
|
||||||
|
# The image will be sent separately via the `image_url` entry in the vision message.
|
||||||
|
# Keeping the textual prompt concise avoids blowing up the context window.
|
||||||
|
prompt = f"""
|
||||||
|
Extract metadata from the following image according to this schema:
|
||||||
|
|
||||||
|
{schema_text}
|
||||||
|
|
||||||
|
The image is attached below.
|
||||||
|
|
||||||
|
Follow these guidelines:
|
||||||
|
1. Extract all requested information as simple strings, numbers, or booleans
|
||||||
|
(not as objects or nested structures)
|
||||||
|
2. If information is not present, indicate this with null instead of making something up
|
||||||
|
3. Answer directly with the requested information - don't include explanations or reasoning
|
||||||
|
4. Be concise but accurate in your extractions
|
||||||
|
"""
|
||||||
prompt_context = "image" if self.stage == "post_chunking" else "document with images"
|
prompt_context = "image" if self.stage == "post_chunking" else "document with images"
|
||||||
else:
|
else:
|
||||||
prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
|
prompt_context = "chunk of text" if self.stage == "post_chunking" else "text"
|
||||||
|
prompt = f"""
|
||||||
|
Extract metadata from the following {prompt_context} according to this schema:
|
||||||
|
|
||||||
prompt = f"""
|
{schema_text}
|
||||||
Extract metadata from the following {prompt_context} according to this schema:
|
|
||||||
|
|
||||||
{schema_text}
|
Text to extract from:
|
||||||
|
{content}
|
||||||
|
|
||||||
{"Image to analyze:" if self.use_images else "Text to extract from:"}
|
Follow these guidelines:
|
||||||
{content}
|
1. Extract all requested information as simple strings, numbers, or booleans
|
||||||
|
(not as objects or nested structures)
|
||||||
Follow these guidelines:
|
2. If information is not present, indicate this with null instead of making something up
|
||||||
1. Extract all requested information as simple strings, numbers, or booleans
|
3. Answer directly with the requested information - don't include explanations or reasoning
|
||||||
(not as objects or nested structures)
|
4. Be concise but accurate in your extractions
|
||||||
2. If information is not present, indicate this with null instead of making something up
|
"""
|
||||||
3. Answer directly with the requested information - don't include explanations or reasoning
|
|
||||||
4. Be concise but accurate in your extractions
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Get the model configuration from registered_models
|
# Get the model configuration from registered_models
|
||||||
model_config = settings.REGISTERED_MODELS.get(settings.RULES_MODEL, {})
|
model_config = settings.REGISTERED_MODELS.get(settings.RULES_MODEL, {})
|
||||||
|
@ -744,17 +744,17 @@ class DocumentService:
|
|||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
img = PILImage.open(BytesIO(file_content))
|
img = PILImage.open(BytesIO(file_content))
|
||||||
# Resize the image to a max width of 512 while preserving aspect ratio to
|
# Resize and compress aggressively to minimize context window footprint
|
||||||
# keep the base64 payload smaller (helps avoid context window errors).
|
max_width = 256 # reduce width to shrink payload dramatically
|
||||||
max_width = 512
|
|
||||||
if img.width > max_width:
|
if img.width > max_width:
|
||||||
ratio = max_width / float(img.width)
|
ratio = max_width / float(img.width)
|
||||||
new_height = int(float(img.height) * ratio)
|
new_height = int(float(img.height) * ratio)
|
||||||
img = img.resize((max_width, new_height))
|
img = img.resize((max_width, new_height))
|
||||||
|
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
img.save(buffered, format="PNG", optimize=True)
|
# Save as JPEG with moderate quality instead of PNG to reduce size further
|
||||||
img_b64 = "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode()
|
img.convert("RGB").save(buffered, format="JPEG", quality=70, optimize=True)
|
||||||
|
img_b64 = "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode()
|
||||||
return [Chunk(content=img_b64, metadata={"is_image": True})]
|
return [Chunk(content=img_b64, metadata={"is_image": True})]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")
|
logger.error(f"Error resizing image for base64 encoding: {e}. Falling back to original size.")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user