morphik-core/core/parser/base_parser.py
2025-01-01 09:18:23 -05:00

20 lines
531 B
Python

from abc import ABC, abstractmethod
from typing import Any, Dict, List, Tuple
from core.models.chunk import Chunk
class BaseParser(ABC):
"""Base class for document parsing"""
@abstractmethod
async def split_text(self, text: str) -> List[Chunk]:
"""Split plain text into chunks"""
pass
@abstractmethod
async def parse_file(
self, file: bytes, content_type: str, filename: str
) -> Tuple[Dict[str, Any], List[Chunk]]:
"""Parse file content into text chunks"""
pass