diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 177039a..8cf3fa8 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -60,7 +60,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -databridgesuperuser@gmail.com. +founders@morphik.ai. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/DOCKER.md b/DOCKER.md index 2261b19..ca7d936 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -1,6 +1,6 @@ -# Docker Setup Guide for DataBridge Core +# Docker Setup Guide for Morphik Core -DataBridge Core provides a streamlined Docker-based setup that includes all necessary components: the core API, PostgreSQL with pgvector, and Ollama for AI models. +Morphik Core provides a streamlined Docker-based setup that includes all necessary components: the core API, PostgreSQL with pgvector, and Ollama for AI models. ## Prerequisites @@ -12,8 +12,8 @@ DataBridge Core provides a streamlined Docker-based setup that includes all nece 1. Clone the repository and navigate to the project directory: ```bash -git clone https://github.com/databridge-org/databridge-core.git -cd databridge-core +git clone https://github.com/morphik-org/morphik-core.git +cd morphik-core ``` 2. First-time setup: @@ -50,9 +50,9 @@ The default configuration works out of the box and includes: - Local file storage - Basic authentication -### 2. Configuration File (databridge.toml) +### 2. Configuration File (morphik.toml) -The default `databridge.toml` is configured for Docker and includes: +The default `morphik.toml` is configured for Docker and includes: ```toml [api] @@ -94,13 +94,13 @@ PORT=8000 # Change if needed ### 4. Custom Configuration To use your own configuration: -1. Create a custom `databridge.toml` +1. Create a custom `morphik.toml` 2. Mount it in `docker-compose.yml`: ```yaml services: - databridge: + morphik: volumes: - - ./my-custom-databridge.toml:/app/databridge.toml + - ./my-custom-morphik.toml:/app/morphik.toml ``` ## Accessing Services @@ -124,14 +124,14 @@ services: docker compose logs # View specific service logs - docker compose logs databridge + docker compose logs morphik docker compose logs postgres docker compose logs ollama ``` 2. **Database Issues** - Check PostgreSQL is healthy: `docker compose ps` - - Verify database connection: `docker compose exec postgres psql -U databridge -d databridge` + - Verify database connection: `docker compose exec postgres psql -U morphik -d morphik` 3. **Model Download Issues** - Check Ollama logs: `docker compose logs ollama` @@ -166,8 +166,8 @@ For production environments: ## Support For issues and feature requests: -- GitHub Issues: [https://github.com/databridge-org/databridge-core/issues](https://github.com/databridge-org/databridge-core/issues) -- Documentation: [https://databridge.gitbook.io/databridge-docs](https://databridge.gitbook.io/databridge-docs) +- GitHub Issues: [https://github.com/morphik-org/morphik-core/issues](https://github.com/morphik-org/morphik-core/issues) +- Documentation: [https://docs.morphik.ai](https://docs.morphik.ai) ## Repository Information diff --git a/LICENSE b/LICENSE index 494f676..f0df534 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Morphik +Copyright (c) 2024 Morphik, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 46f4e1f..082185c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ **Note**: Morphik is launching a hosted service soon! Please sign up for the [waitlist](https://docs.google.com/forms/d/1gFoUKzECICugInLkRlAlgwrkRVorfNywAgkmcjmVGkE/edit). -[![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/databridge-org/databridge-core/tree/main?tab=License-1-ov-file#readme) [![PyPI - Version](https://img.shields.io/pypi/v/databridge-client)](https://pypi.org/project/databridge-client/) [![Discord](https://img.shields.io/discord/1336524712817332276?logo=discord&label=discord)](https://discord.gg/BwMtv3Zaju) +[![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/morphik-org/morphik-core/tree/main?tab=License-1-ov-file#readme) [![PyPI - Version](https://img.shields.io/pypi/v/morphik)](https://pypi.org/project/morphik/) [![Discord](https://img.shields.io/discord/1336524712817332276?logo=discord&label=discord)](https://discord.gg/BwMtv3Zaju) ## What is Morphik? @@ -72,10 +72,10 @@ python start_server.py ### Using the Python SDK ```python -from databridge import DataBridge +from morphik import Morphik -# Connect to DataBridge server -db = DataBridge("databridge://localhost:8000") +# Connect to Morphik server +db = Morphik("morphik://localhost:8000") # Ingest a document doc = db.ingest_text("This is a sample document about AI technology.", @@ -182,10 +182,10 @@ for chunk in chunks: For comprehensive documentation: -- [Installation Guide](https://databridge.mintlify.app/getting-started) -- [Core Concepts](https://databridge.mintlify.app/concepts/naive-rag) -- [Python SDK](https://databridge.mintlify.app/python-sdk/databridge) -- [API Reference](https://databridge.mintlify.app/api-reference/health-check) +- [Installation Guide](https://docs.morphik.ai/getting-started) +- [Core Concepts](https://docs.morphik.ai/concepts/naive-rag) +- [Python SDK](https://docs.morphik.ai/python-sdk/morphik) +- [API Reference](https://docs.morphik.ai/api-reference/health-check) ## License @@ -194,7 +194,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file ## Community - [Discord](https://discord.gg/BwMtv3Zaju) - Join our community -- [GitHub](https://github.com/databridge-org/databridge-core) - Contribute to development +- [GitHub](https://github.com/morphik-org/morphik-core) - Contribute to development --- diff --git a/core/api.py b/core/api.py index ea2998c..0552ae8 100644 --- a/core/api.py +++ b/core/api.py @@ -16,7 +16,7 @@ from core.models.documents import Document, DocumentResult, ChunkResult from core.models.graph import Graph from core.models.auth import AuthContext, EntityType from core.models.prompts import validate_prompt_overrides_with_http_exception -from core.parser.databridge_parser import DatabridgeParser +from core.parser.morphik_parser import MorphikParser from core.services.document_service import DocumentService from core.services.telemetry import TelemetryService from core.config import get_settings @@ -171,7 +171,7 @@ match settings.STORAGE_PROVIDER: raise ValueError(f"Unsupported storage provider: {settings.STORAGE_PROVIDER}") # Initialize parser -parser = DatabridgeParser( +parser = MorphikParser( chunk_size=settings.CHUNK_SIZE, chunk_overlap=settings.CHUNK_OVERLAP, use_unstructured_api=settings.USE_UNSTRUCTURED_API, @@ -1234,14 +1234,14 @@ async def generate_local_uri( token = jwt.encode(payload, settings.JWT_SECRET_KEY, algorithm=settings.JWT_ALGORITHM) # Read config for host/port - with open("databridge.toml", "rb") as f: + with open("morphik.toml", "rb") as f: config = tomli.load(f) base_url = f"{config['api']['host']}:{config['api']['port']}".replace( "localhost", "127.0.0.1" ) # Generate URI - uri = f"databridge://{name}:{token}@{base_url}" + uri = f"morphik://{name}:{token}@{base_url}" return {"uri": uri} except Exception as e: logger.error(f"Error generating local URI: {e}") diff --git a/core/config.py b/core/config.py index d2226c2..4c515e6 100644 --- a/core/config.py +++ b/core/config.py @@ -8,7 +8,7 @@ from collections import ChainMap class Settings(BaseSettings): - """DataBridge configuration settings.""" + """Morphik configuration settings.""" # Environment variables JWT_SECRET_KEY: str @@ -100,7 +100,7 @@ class Settings(BaseSettings): HONEYCOMB_ENABLED: bool = True HONEYCOMB_ENDPOINT: str = "https://api.honeycomb.io" HONEYCOMB_PROXY_ENDPOINT: str = "https://otel-proxy.onrender.com/" - SERVICE_NAME: str = "databridge-core" + SERVICE_NAME: str = "morphik-core" OTLP_TIMEOUT: int = 10 OTLP_MAX_RETRIES: int = 3 OTLP_RETRY_DELAY: int = 1 @@ -115,7 +115,7 @@ def get_settings() -> Settings: load_dotenv(override=True) # Load config.toml - with open("databridge.toml", "rb") as f: + with open("morphik.toml", "rb") as f: config = tomli.load(f) em = "'{missing_value}' needed if '{field}' is set to '{value}'" @@ -277,10 +277,10 @@ def get_settings() -> Settings: raise ValueError("'model' is required in the rules configuration") rules_config["RULES_MODEL"] = config["rules"]["model"] - # load databridge config - databridge_config = { - "ENABLE_COLPALI": config["databridge"]["enable_colpali"], - "MODE": config["databridge"].get("mode", "cloud"), # Default to "cloud" mode + # load morphik config + morphik_config = { + "ENABLE_COLPALI": config["morphik"]["enable_colpali"], + "MODE": config["morphik"].get("mode", "cloud"), # Default to "cloud" mode } # load graph config @@ -301,7 +301,7 @@ def get_settings() -> Settings: "TELEMETRY_ENABLED": config["telemetry"].get("enabled", True), "HONEYCOMB_ENABLED": config["telemetry"].get("honeycomb_enabled", True), "HONEYCOMB_ENDPOINT": config["telemetry"].get("honeycomb_endpoint", "https://api.honeycomb.io"), - "SERVICE_NAME": config["telemetry"].get("service_name", "databridge-core"), + "SERVICE_NAME": config["telemetry"].get("service_name", "morphik-core"), "OTLP_TIMEOUT": config["telemetry"].get("otlp_timeout", 10), "OTLP_MAX_RETRIES": config["telemetry"].get("otlp_max_retries", 3), "OTLP_RETRY_DELAY": config["telemetry"].get("otlp_retry_delay", 1), @@ -322,7 +322,7 @@ def get_settings() -> Settings: storage_config, vector_store_config, rules_config, - databridge_config, + morphik_config, graph_config, telemetry_config, openai_config, diff --git a/core/logging_config.py b/core/logging_config.py index d5f6a1c..26ea4e0 100644 --- a/core/logging_config.py +++ b/core/logging_config.py @@ -31,7 +31,7 @@ def setup_logging(log_level: str = "INFO"): console_handler.setLevel(level) # File handler - file_handler = logging.FileHandler(log_dir / "databridge.log") + file_handler = logging.FileHandler(log_dir / "morphik.log") file_handler.setFormatter(console_formatter) file_handler.setLevel(level) diff --git a/core/parser/databridge_parser.py b/core/parser/morphik_parser.py similarity index 99% rename from core/parser/databridge_parser.py rename to core/parser/morphik_parser.py index 6d5a181..01888d2 100644 --- a/core/parser/databridge_parser.py +++ b/core/parser/morphik_parser.py @@ -131,7 +131,7 @@ class ContextualChunker(BaseChunker): return contextualized_chunks -class DatabridgeParser(BaseParser): +class MorphikParser(BaseParser): """Unified parser that handles different file types and chunking strategies""" def __init__( @@ -178,7 +178,7 @@ class DatabridgeParser(BaseParser): video_path = temp_file.name try: - # Load the config to get the frame_sample_rate from databridge.toml + # Load the config to get the frame_sample_rate from morphik.toml config = load_config() parser_config = config.get("parser", {}) vision_config = parser_config.get("vision", {}) diff --git a/core/parser/video/parse_video.py b/core/parser/video/parse_video.py index 020a1cc..3baadfa 100644 --- a/core/parser/video/parse_video.py +++ b/core/parser/video/parse_video.py @@ -17,7 +17,7 @@ def debug_object(title, obj): def load_config() -> Dict[str, Any]: - config_path = os.path.join(os.path.dirname(__file__), "../../../databridge.toml") + config_path = os.path.join(os.path.dirname(__file__), "../../../morphik.toml") with open(config_path, "rb") as f: return tomli.load(f) diff --git a/core/services/user_service.py b/core/services/user_service.py index 9c3169e..49b167b 100644 --- a/core/services/user_service.py +++ b/core/services/user_service.py @@ -231,6 +231,6 @@ class UserService: # Generate URI with API domain api_domain = getattr(self.settings, "API_DOMAIN", "api.morphik.ai") - uri = f"databridge://{name}:{token}@{api_domain}" + uri = f"morphik://{name}:{token}@{api_domain}" return uri diff --git a/core/storage/s3_storage.py b/core/storage/s3_storage.py index d193325..615e71d 100644 --- a/core/storage/s3_storage.py +++ b/core/storage/s3_storage.py @@ -22,7 +22,7 @@ class S3Storage(BaseStorage): aws_access_key: str, aws_secret_key: str, region_name: str = "us-east-2", - default_bucket: str = "databridge-storage", + default_bucket: str = "morphik-storage", ): self.default_bucket = default_bucket self.s3_client = boto3.client( diff --git a/core/tests/__init__.py b/core/tests/__init__.py index e32287d..36c9365 100644 --- a/core/tests/__init__.py +++ b/core/tests/__init__.py @@ -10,7 +10,7 @@ def setup_test_logging(): This function configures logging specifically for test runs to: - Suppress verbose logs from LiteLLM and other external libraries - Show only warnings and errors from these libraries - - Keep INFO level for DataBridge core components + - Keep INFO level for Morphik core components """ # Configure root logger root_logger = logging.getLogger() diff --git a/core/tests/integration/test_api.py b/core/tests/integration/test_api.py index b6f1298..2d293c1 100644 --- a/core/tests/integration/test_api.py +++ b/core/tests/integration/test_api.py @@ -51,7 +51,7 @@ async def setup_test_environment(event_loop): # Create a test text file text_file = TEST_DATA_DIR / "test.txt" if not text_file.exists(): - text_file.write_text("This is a test document for DataBridge testing.") + text_file.write_text("This is a test document for Morphik testing.") # Create a small test PDF if it doesn't exist pdf_file = TEST_DATA_DIR / "test.pdf" diff --git a/docker-compose.yml b/docker-compose.yml index 89678c1..410a07d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ x-ollama-check: &ollama-check # This command reads the toml file and checks if any provider is set to "ollama" command: > /bin/sh -c ' - grep -q "provider *= *\"ollama\"" databridge.toml && + grep -q "provider *= *\"ollama\"" morphik.toml && echo "true" > /tmp/needs_ollama || echo "false" > /tmp/needs_ollama' @@ -12,24 +12,24 @@ services: config-check: image: alpine volumes: - - ./databridge.toml:/databridge.toml + - ./morphik.toml:/morphik.toml <<: *ollama-check - databridge: + morphik: build: . ports: - "8000:8000" environment: - JWT_SECRET_KEY=${JWT_SECRET_KEY:-your-secret-key-here} - - POSTGRES_URI=postgresql+asyncpg://databridge:databridge@postgres:5432/databridge - - PGPASSWORD=databridge + - POSTGRES_URI=postgresql+asyncpg://morphik:morphik@postgres:5432/morphik + - PGPASSWORD=morphik - HOST=0.0.0.0 - PORT=8000 - LOG_LEVEL=DEBUG volumes: - ./storage:/app/storage - ./logs:/app/logs - - ./databridge.toml:/app/databridge.toml + - ./morphik.toml:/app/morphik.toml - huggingface_cache:/root/.cache/huggingface depends_on: postgres: @@ -40,7 +40,7 @@ services: condition: service_started required: false networks: - - databridge-network + - morphik-network env_file: - .env @@ -50,22 +50,22 @@ services: dockerfile: postgres.dockerfile shm_size: 128mb environment: - - POSTGRES_USER=databridge - - POSTGRES_PASSWORD=databridge - - POSTGRES_DB=databridge + - POSTGRES_USER=morphik + - POSTGRES_PASSWORD=morphik + - POSTGRES_DB=morphik - PGDATA=/var/lib/postgresql/data/pgdata ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U databridge -d databridge"] + test: ["CMD-SHELL", "pg_isready -U morphik -d morphik"] interval: 10s timeout: 5s retries: 5 start_period: 10s networks: - - databridge-network + - morphik-network ollama: image: ollama/ollama:latest @@ -77,11 +77,11 @@ services: - ollama_data:/root/.ollama - ./ollama-entrypoint.sh:/ollama-entrypoint.sh networks: - - databridge-network + - morphik-network entrypoint: ["/ollama-entrypoint.sh"] networks: - databridge-network: + morphik-network: driver: bridge volumes: diff --git a/docs/telemetry.md b/docs/telemetry.md index d5e1c05..13111e9 100644 --- a/docs/telemetry.md +++ b/docs/telemetry.md @@ -1,6 +1,6 @@ -# DataBridge Telemetry +# Morphik Telemetry -DataBridge includes an anonymous telemetry system to help us understand how the library is being used and to improve its functionality. We take privacy very seriously and ensure that no personally identifiable information (PII) is ever collected. +Morphik includes an anonymous telemetry system to help us understand how the library is being used and to improve its functionality. We take privacy very seriously and ensure that no personally identifiable information (PII) is ever collected. ## What We Collect @@ -42,7 +42,7 @@ os.environ["DATABRIDGE_TELEMETRY_ENABLED"] = "0" All telemetry data is: - Stored securely - Automatically anonymized before transmission -- Used only for improving DataBridge +- Used only for improving Morphik - Never shared with third parties - Retained for a maximum of 90 days @@ -57,7 +57,7 @@ You can inspect the telemetry data being collected by looking at the local log f ## Why We Collect Telemetry This data helps us: -1. Understand how DataBridge is used in real-world scenarios +1. Understand how Morphik is used in real-world scenarios 2. Identify performance bottlenecks 3. Prioritize features and improvements 4. Fix bugs faster @@ -67,5 +67,5 @@ This data helps us: If you have any questions or concerns about our telemetry collection, please: 1. Open an issue on our GitHub repository -2. Email us at privacy@databridge.dev +2. Email us at founders@morphik.ai 3. Review our telemetry implementation in `core/services/telemetry.py` \ No newline at end of file diff --git a/examples/basic_operations.py b/examples/basic_operations.py index 6a6de71..6d0b82b 100644 --- a/examples/basic_operations.py +++ b/examples/basic_operations.py @@ -1,17 +1,17 @@ import os from dotenv import load_dotenv -from databridge import DataBridge +from morphik import Morphik # Load environment variables load_dotenv() -# Connect to DataBridge -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +# Connect to Morphik +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) # Basic text ingestion text_doc = db.ingest_text( - "DataBridge is an open-source database designed for AI applications that simplifies working with unstructured data.", - metadata={"category": "tech", "author": "DataBridge"} + "Morphik is an open-source database designed for AI applications that simplifies working with unstructured data.", + metadata={"category": "tech", "author": "Morphik"} ) print(f"Ingested text document with ID: {text_doc.external_id}") @@ -24,7 +24,7 @@ print(f"Ingested file with ID: {file_doc.external_id}") # Basic retrieval chunks = db.retrieve_chunks( - query="What is DataBridge?", + query="What is Morphik?", k=3 ) @@ -34,6 +34,6 @@ for chunk in chunks: print(f"Score: {chunk.score}\n") # Basic query with RAG -response = db.query("What is DataBridge and what is it used for?") +response = db.query("What is Morphik and what is it used for?") print("Query response:") print(response.completion) \ No newline at end of file diff --git a/examples/batch_operations.py b/examples/batch_operations.py index 4f2961d..5497f1d 100644 --- a/examples/batch_operations.py +++ b/examples/batch_operations.py @@ -1,13 +1,13 @@ import os import tempfile from dotenv import load_dotenv -from databridge import DataBridge +from morphik import Morphik # Load environment variables load_dotenv() -# Connect to DataBridge -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +# Connect to Morphik +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) # Create some sample text files for batch ingestion def create_sample_files(): diff --git a/examples/cache_augmented_generation.py b/examples/cache_augmented_generation.py index 027a594..882ee77 100644 --- a/examples/cache_augmented_generation.py +++ b/examples/cache_augmented_generation.py @@ -1,12 +1,12 @@ import os from dotenv import load_dotenv -from databridge import DataBridge +from morphik import Morphik # Load environment variables load_dotenv() -# Connect to DataBridge -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +# Connect to Morphik +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) # Sample document for demonstration long_document = """ diff --git a/examples/colpali.py b/examples/colpali.py index c0ca443..bb7a3ff 100644 --- a/examples/colpali.py +++ b/examples/colpali.py @@ -1,13 +1,13 @@ import io -from databridge import DataBridge +from morphik import Morphik import os from dotenv import load_dotenv from PIL import Image load_dotenv() -## Connect to the DataBridge instance -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +## Connect to the Morphik instance +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) ## Ingestion Pathway db.ingest_file("examples/assets/colpali_example.pdf", use_colpali=True) @@ -22,6 +22,6 @@ for chunk in chunks: else: print(chunk.content) -# You can also directly query a VLM as defined in `databridge.toml` +# You can also directly query a VLM as defined in the configuration response = db.query("At what frequency do we achieve the highest Image Rejection Ratio?", use_colpali=True, k=3) print(response.completion) diff --git a/examples/knowledge_graphs.py b/examples/knowledge_graphs.py index 29612a0..9479d8b 100644 --- a/examples/knowledge_graphs.py +++ b/examples/knowledge_graphs.py @@ -1,12 +1,12 @@ import os from dotenv import load_dotenv -from databridge import DataBridge +from morphik import Morphik # Load environment variables load_dotenv() -# Connect to DataBridge -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +# Connect to Morphik +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) # First, ensure we have some documents to work with sample_texts = [ diff --git a/examples/rules_engine.py b/examples/rules_engine.py index 5cf0b1f..1845a47 100644 --- a/examples/rules_engine.py +++ b/examples/rules_engine.py @@ -1,14 +1,14 @@ import os from dotenv import load_dotenv -from databridge import DataBridge -from databridge.rules import MetadataExtractionRule, NaturalLanguageRule +from morphik import Morphik +from morphik.rules import MetadataExtractionRule, NaturalLanguageRule from pydantic import BaseModel # Load environment variables load_dotenv() -# Connect to DataBridge -db = DataBridge(os.getenv("DATABRIDGE_URI"), timeout=10000, is_local=True) +# Connect to Morphik +db = Morphik(os.getenv("MORPHIK_URI"), timeout=10000, is_local=True) # Define sample text with information we want to extract sample_text = """ diff --git a/databridge.toml b/morphik.toml similarity index 98% rename from databridge.toml rename to morphik.toml index 9f9bd50..52ef376 100644 --- a/databridge.toml +++ b/morphik.toml @@ -77,7 +77,7 @@ storage_path = "./storage" # [storage] # provider = "aws-s3" # region = "us-east-2" -# bucket_name = "databridge-s3-storage" +# bucket_name = "morphik-s3-storage" [vector_store] provider = "pgvector" @@ -86,7 +86,7 @@ provider = "pgvector" model = "ollama_llama" batch_size = 4096 -[databridge] +[morphik] enable_colpali = true mode = "self_hosted" # "cloud" or "self_hosted" diff --git a/quick_setup.py b/quick_setup.py index e0de6fb..bb9afb6 100644 --- a/quick_setup.py +++ b/quick_setup.py @@ -38,11 +38,11 @@ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(messag console_handler.setFormatter(formatter) LOGGER.addHandler(console_handler) -# Load configuration from databridge.toml -config_path = Path("databridge.toml") +# Load configuration from morphik.toml +config_path = Path("morphik.toml") with open(config_path, "rb") as f: CONFIG = tomli.load(f) - LOGGER.info("Loaded configuration from databridge.toml") + LOGGER.info("Loaded configuration from morphik.toml") # Extract configuration values STORAGE_PROVIDER = CONFIG["storage"]["provider"] diff --git a/sdks/python/PUBLISH.md b/sdks/python/PUBLISH.md index 20b37b5..8e41794 100644 --- a/sdks/python/PUBLISH.md +++ b/sdks/python/PUBLISH.md @@ -1,7 +1,7 @@ # Publish to PyPI - `cd` into the `sdks/python` directory -- Update the package version in `pyproject.toml`, `databridge/__init__.py`. +- Update the package version in `pyproject.toml`, `morphik/__init__.py`. - Ensure you have the correct PyPI API key/certificates/ssh keys installed ```bash diff --git a/sdks/python/README.md b/sdks/python/README.md index 9c09a1b..8584246 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -5,14 +5,14 @@ A Python client for Morphik API that enables document ingestion and semantic sea ## Installation ```bash -pip install databridge-client +pip install morphik ``` ```python -from databridge import DataBridge +from morphik import Morphik # Initialize client -db = DataBridge("your-api-key") +db = Morphik("your-api-key") # Ingest a document doc_id = await db.ingest_document( diff --git a/sdks/python/databridge/__init__.py b/sdks/python/databridge/__init__.py deleted file mode 100644 index 84eb99b..0000000 --- a/sdks/python/databridge/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -DataBridge Python SDK for document ingestion and querying. -""" - -from .sync import DataBridge -from .async_ import AsyncDataBridge -from .models import Document - -__all__ = [ - "DataBridge", - "AsyncDataBridge", - "Document", -] - -__version__ = "0.3.0" diff --git a/sdks/python/databridge/exceptions.py b/sdks/python/databridge/exceptions.py deleted file mode 100644 index a4de662..0000000 --- a/sdks/python/databridge/exceptions.py +++ /dev/null @@ -1,16 +0,0 @@ -class DataBridgeError(Exception): - """Base exception for DataBridge SDK""" - - pass - - -class AuthenticationError(DataBridgeError): - """Authentication related errors""" - - pass - - -class ConnectionError(DataBridgeError): - """Connection related errors""" - - pass diff --git a/sdks/python/morphik/__init__.py b/sdks/python/morphik/__init__.py new file mode 100644 index 0000000..2cd6621 --- /dev/null +++ b/sdks/python/morphik/__init__.py @@ -0,0 +1,15 @@ +""" +Morphik Python SDK for document ingestion and querying. +""" + +from .sync import Morphik +from .async_ import AsyncMorphik +from .models import Document + +__all__ = [ + "Morphik", + "AsyncMorphik", + "Document", +] + +__version__ = "0.1.0" diff --git a/sdks/python/databridge/async_.py b/sdks/python/morphik/async_.py similarity index 98% rename from sdks/python/databridge/async_.py rename to sdks/python/morphik/async_.py index e91e92a..a70d6f8 100644 --- a/sdks/python/databridge/async_.py +++ b/sdks/python/morphik/async_.py @@ -36,7 +36,7 @@ RuleOrDict = Union[Rule, Dict[str, Any]] class AsyncCache: - def __init__(self, db: "AsyncDataBridge", name: str): + def __init__(self, db: "AsyncMorphik", name: str): self._db = db self._name = name @@ -74,12 +74,12 @@ class FinalChunkResult(BaseModel): arbitrary_types_allowed = True -class AsyncDataBridge: +class AsyncMorphik: """ - DataBridge client for document operations. + Morphik client for document operations. Args: - uri (str, optional): DataBridge URI in format "databridge://:@". + uri (str, optional): Morphik URI in format "morphik://:@". If not provided, connects to http://localhost:8000 without authentication. timeout (int, optional): Request timeout in seconds. Defaults to 30. is_local (bool, optional): Whether to connect to a local server. Defaults to False. @@ -87,11 +87,11 @@ class AsyncDataBridge: Examples: ```python # Without authentication - async with AsyncDataBridge() as db: + async with AsyncMorphik() as db: doc = await db.ingest_text("Sample content") # With authentication - async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db: + async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db: doc = await db.ingest_text("Sample content") ``` """ @@ -179,7 +179,7 @@ class AsyncDataBridge: use_colpali: bool = True, ) -> Document: """ - Ingest a text document into DataBridge. + Ingest a text document into Morphik. Args: content: Text content to ingest @@ -193,7 +193,7 @@ class AsyncDataBridge: Example: ```python - from databridge.rules import MetadataExtractionRule, NaturalLanguageRule + from morphik.rules import MetadataExtractionRule, NaturalLanguageRule from pydantic import BaseModel class DocumentInfo(BaseModel): @@ -233,7 +233,7 @@ class AsyncDataBridge: rules: Optional[List[RuleOrDict]] = None, use_colpali: bool = True, ) -> Document: - """Ingest a file document into DataBridge.""" + """Ingest a file document into Morphik.""" # Handle different file input types if isinstance(file, (str, Path)): file_path = Path(file) @@ -276,7 +276,7 @@ class AsyncDataBridge: parallel: bool = True, ) -> List[Document]: """ - Ingest multiple files into DataBridge. + Ingest multiple files into Morphik. Args: files: List of files to ingest (path strings, bytes, file objects, or Paths) @@ -350,7 +350,7 @@ class AsyncDataBridge: parallel: bool = True, ) -> List[Document]: """ - Ingest all files in a directory into DataBridge. + Ingest all files in a directory into Morphik. Args: directory: Path to directory containing files to ingest @@ -559,7 +559,7 @@ class AsyncDataBridge: ) # With prompt customization - from databridge.models import QueryPromptOverride, QueryPromptOverrides + from morphik.models import QueryPromptOverride, QueryPromptOverrides response = await db.query( "What are the key findings?", prompt_overrides=QueryPromptOverrides( @@ -1061,7 +1061,7 @@ class AsyncDataBridge: ] # Or using ChunkSource objects - from databridge.models import ChunkSource + from morphik.models import ChunkSource sources = [ ChunkSource(document_id="doc_123", chunk_number=0), ChunkSource(document_id="doc_456", chunk_number=2) @@ -1223,7 +1223,7 @@ class AsyncDataBridge: ) # With custom entity extraction examples - from databridge.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides + from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides graph = await db.create_graph( name="medical_graph", filters={"category": "medical"}, @@ -1324,7 +1324,7 @@ class AsyncDataBridge: print(f"Graph now has {len(updated_graph.entities)} entities") # With entity resolution examples - from databridge.models import EntityResolutionPromptOverride, EntityResolutionExample, GraphPromptOverrides + from morphik.models import EntityResolutionPromptOverride, EntityResolutionExample, GraphPromptOverrides updated_graph = await db.update_graph( name="research_graph", additional_documents=["doc4"], diff --git a/sdks/python/morphik/exceptions.py b/sdks/python/morphik/exceptions.py new file mode 100644 index 0000000..4701264 --- /dev/null +++ b/sdks/python/morphik/exceptions.py @@ -0,0 +1,16 @@ +class MorphikError(Exception): + """Base exception for Morphik SDK""" + + pass + + +class AuthenticationError(MorphikError): + """Authentication related errors""" + + pass + + +class ConnectionError(MorphikError): + """Connection related errors""" + + pass diff --git a/sdks/python/databridge/models.py b/sdks/python/morphik/models.py similarity index 100% rename from sdks/python/databridge/models.py rename to sdks/python/morphik/models.py diff --git a/sdks/python/databridge/rules.py b/sdks/python/morphik/rules.py similarity index 100% rename from sdks/python/databridge/rules.py rename to sdks/python/morphik/rules.py diff --git a/sdks/python/databridge/sync.py b/sdks/python/morphik/sync.py similarity index 97% rename from sdks/python/databridge/sync.py rename to sdks/python/morphik/sync.py index 22544ea..f05e258 100644 --- a/sdks/python/databridge/sync.py +++ b/sdks/python/morphik/sync.py @@ -39,7 +39,7 @@ RuleOrDict = Union[Rule, Dict[str, Any]] class Cache: - def __init__(self, db: "DataBridge", name: str): + def __init__(self, db: "Morphik", name: str): self._db = db self._name = name @@ -77,12 +77,12 @@ class FinalChunkResult(BaseModel): arbitrary_types_allowed = True -class DataBridge: +class Morphik: """ - DataBridge client for document operations. + Morphik client for document operations. Args: - uri (str, optional): DataBridge URI in format "databridge://:@". + uri (str, optional): Morphik URI in format "morphik://:@". If not provided, connects to http://localhost:8000 without authentication. timeout (int, optional): Request timeout in seconds. Defaults to 30. is_local (bool, optional): Whether connecting to local development server. Defaults to False. @@ -90,10 +90,10 @@ class DataBridge: Examples: ```python # Without authentication - db = DataBridge() + db = Morphik() # With authentication - db = DataBridge("databridge://owner_id:token@api.databridge.ai") + db = Morphik("morphik://owner_id:token@api.morphik.ai") ``` """ @@ -175,7 +175,7 @@ class DataBridge: use_colpali: bool = True, ) -> Document: """ - Ingest a text document into DataBridge. + Ingest a text document into Morphik. Args: content: Text content to ingest @@ -189,7 +189,7 @@ class DataBridge: Example: ```python - from databridge.rules import MetadataExtractionRule, NaturalLanguageRule + from morphik.rules import MetadataExtractionRule, NaturalLanguageRule from pydantic import BaseModel class DocumentInfo(BaseModel): @@ -230,7 +230,7 @@ class DataBridge: use_colpali: bool = True, ) -> Document: """ - Ingest a file document into DataBridge. + Ingest a file document into Morphik. Args: file: File to ingest (path string, bytes, file object, or Path) @@ -246,7 +246,7 @@ class DataBridge: Example: ```python - from databridge.rules import MetadataExtractionRule, NaturalLanguageRule + from morphik.rules import MetadataExtractionRule, NaturalLanguageRule from pydantic import BaseModel class DocumentInfo(BaseModel): @@ -314,7 +314,7 @@ class DataBridge: parallel: bool = True, ) -> List[Document]: """ - Ingest multiple files into DataBridge. + Ingest multiple files into Morphik. Args: files: List of files to ingest (path strings, bytes, file objects, or Paths) @@ -388,7 +388,7 @@ class DataBridge: parallel: bool = True, ) -> List[Document]: """ - Ingest all files in a directory into DataBridge. + Ingest all files in a directory into Morphik. Args: directory: Path to directory containing files to ingest @@ -595,7 +595,7 @@ class DataBridge: ) # With prompt customization - from databridge.models import QueryPromptOverride, QueryPromptOverrides + from morphik.models import QueryPromptOverride, QueryPromptOverrides response = db.query( "What are the key findings?", prompt_overrides=QueryPromptOverrides( @@ -1095,7 +1095,7 @@ class DataBridge: ] # Or using ChunkSource objects - from databridge.models import ChunkSource + from morphik.models import ChunkSource sources = [ ChunkSource(document_id="doc_123", chunk_number=0), ChunkSource(document_id="doc_456", chunk_number=2) @@ -1254,7 +1254,7 @@ class DataBridge: ) # With custom entity extraction examples - from databridge.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides + from morphik.models import EntityExtractionPromptOverride, EntityExtractionExample, GraphPromptOverrides graph = db.create_graph( name="medical_graph", filters={"category": "medical"}, @@ -1355,7 +1355,7 @@ class DataBridge: print(f"Graph now has {len(updated_graph.entities)} entities") # With entity resolution examples - from databridge.models import EntityResolutionPromptOverride, EntityResolutionExample, GraphPromptOverrides + from morphik.models import EntityResolutionPromptOverride, EntityResolutionExample, GraphPromptOverrides updated_graph = db.update_graph( name="research_graph", additional_documents=["doc4"], diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index dcc6431..25d0b2a 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -3,12 +3,12 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "databridge-client" -version = "0.3.0" +name = "morphik" +version = "0.1.0" authors = [ - { name = "DataBridge", email = "databridgesuperuser@gmail.com" }, + { name = "Morphik", email = "founders@morphik.ai" }, ] -description = "DataBridge Python Client" +description = "Morphik Python Client" readme = "README.md" requires-python = ">=3.8" dependencies = [ @@ -20,9 +20,9 @@ dependencies = [ ] [tool.hatch.build.targets.wheel] -packages = ["databridge"] +packages = ["morphik"] [tool.hatch.build.targets.sdist] include = [ - "/databridge", + "/morphik", ] diff --git a/shell.py b/shell.py index a621965..5aa9bf1 100644 --- a/shell.py +++ b/shell.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ -DataBridge interactive CLI. -Assumes a DataBridge server is running. +Morphik interactive CLI. +Assumes a Morphik server is running. Usage: Without authentication (connects to localhost): @@ -9,7 +9,7 @@ Usage: With authentication: python shell.py - Example: python shell.py "databridge://user:token@localhost:8000" + Example: python shell.py "morphik://user:token@localhost:8000" This provides the exact same interface as the Python SDK: db.ingest_text("content", metadata={...}) @@ -29,18 +29,18 @@ _SDK_PATH = str(Path(__file__).parent / "sdks" / "python") if _SDK_PATH not in sys.path: sys.path.insert(0, _SDK_PATH) -from databridge import DataBridge # noqa: E402 -from databridge.models import Document # noqa: E402 +from morphik import Morphik # noqa: E402 +from morphik.models import Document # noqa: E402 class DB: def __init__(self, uri: str = None): - """Initialize DataBridge with optional URI""" - self._client = DataBridge(uri, is_local=True, timeout=1000) + """Initialize Morphik with optional URI""" + self._client = Morphik(uri, is_local=True, timeout=1000) self.base_url = "http://localhost:8000" # For health check only def check_health(self, max_retries=30, retry_interval=1) -> bool: - """Check if DataBridge server is healthy with retries""" + """Check if Morphik server is healthy with retries""" health_url = f"{self.base_url}/health" for attempt in range(max_retries): @@ -68,7 +68,7 @@ class DB: as_object: bool = False, ) -> Union[dict, "Document"]: """ - Ingest text content into DataBridge. + Ingest text content into Morphik. Args: content: Text content to ingest @@ -104,7 +104,7 @@ class DB: as_object: bool = False, ) -> Union[dict, "Document"]: """ - Ingest a file into DataBridge. + Ingest a file into Morphik. Args: file: Path to file to ingest @@ -147,7 +147,7 @@ class DB: as_objects: bool = False, ) -> List[Union[dict, "Document"]]: """ - Batch ingest multiple files into DataBridge. + Batch ingest multiple files into Morphik. Args: files: List of file paths to ingest @@ -207,7 +207,7 @@ class DB: as_objects: bool = False, ) -> List[Union[dict, "Document"]]: """ - Ingest all files in a directory into DataBridge. + Ingest all files in a directory into Morphik. Args: directory: Path to directory containing files to ingest @@ -855,10 +855,10 @@ if __name__ == "__main__": # Check server health if not db.check_health(): - print("Error: Could not connect to DataBridge server") + print("Error: Could not connect to Morphik server") sys.exit(1) - print("\nConnected to DataBridge") + print("\nConnected to Morphik") # Start an interactive Python shell with 'db' already imported import code @@ -871,7 +871,7 @@ if __name__ == "__main__": shell = code.InteractiveConsole(locals()) # Print welcome message - print("\nDataBridge CLI ready to use. The 'db' object is available with all SDK methods.") + print("\nMorphik CLI ready to use. The 'db' object is available with all SDK methods.") print("Examples:") print(" db.ingest_text('hello world')") print(" db.query('what are the key findings?')") diff --git a/start_server.py b/start_server.py index e466c5c..f98ff51 100644 --- a/start_server.py +++ b/start_server.py @@ -22,7 +22,7 @@ def check_ollama_running(base_url): def get_ollama_usage_info(): """Check if Ollama is required based on the configuration file and get base URLs.""" try: - with open("databridge.toml", "rb") as f: + with open("morphik.toml", "rb") as f: config = tomli.load(f) ollama_configs = [] @@ -81,7 +81,7 @@ def get_ollama_usage_info(): def main(): # Parse command line arguments - parser = argparse.ArgumentParser(description="Start the DataBridge server") + parser = argparse.ArgumentParser(description="Start the Morphik server") parser.add_argument( "--log", choices=["debug", "info", "warning", "error"],