morphik-core/core/completion/ollama_completion.py
2024-12-27 12:17:16 +05:30

51 lines
1.7 KiB
Python

from core.completion.base_completion import (
BaseCompletionModel,
CompletionRequest,
CompletionResponse,
)
from ollama import AsyncClient
class OllamaCompletionModel(BaseCompletionModel):
"""Ollama completion model implementation"""
def __init__(self, model_name: str, base_url: str):
self.model_name = model_name
self.client = AsyncClient(host=base_url)
async def complete(self, request: CompletionRequest) -> CompletionResponse:
"""Generate completion using Ollama API"""
# Construct prompt with context
context = "\n\n".join(request.context_chunks)
prompt = f"""You are a helpful assistant. Use the provided context to answer questions accurately.
Context:
{context}
Question: {request.query}"""
# Call Ollama API
response = await self.client.chat(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
options={
"num_predict": request.max_tokens,
"temperature": request.temperature,
},
)
# Ollama doesn't provide token usage info, so we'll estimate based on characters
completion_text = response["message"]["content"]
char_to_token_ratio = 4 # Rough estimate
estimated_prompt_tokens = len(prompt) // char_to_token_ratio
estimated_completion_tokens = len(completion_text) // char_to_token_ratio
return CompletionResponse(
completion=completion_text,
usage={
"prompt_tokens": estimated_prompt_tokens,
"completion_tokens": estimated_completion_tokens,
"total_tokens": estimated_prompt_tokens + estimated_completion_tokens,
},
)