mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
20 lines
520 B
Python
20 lines
520 B
Python
from abc import ABC, abstractmethod
|
|
from typing import Any, Dict, List, Tuple
|
|
from core.models.documents import Chunk
|
|
|
|
|
|
class BaseParser(ABC):
|
|
"""Base class for document parsing"""
|
|
|
|
@abstractmethod
|
|
async def split_text(self, text: str) -> List[Chunk]:
|
|
"""Split plain text into chunks"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def parse_file(
|
|
self, file: bytes, content_type: str
|
|
) -> Tuple[Dict[str, Any], List[Chunk]]:
|
|
"""Parse file content into text chunks"""
|
|
pass
|